diff --git a/common/polystar/common/image_pipeline/preprocessors/normalise.py b/common/polystar/common/image_pipeline/preprocessors/normalise.py new file mode 100644 index 0000000000000000000000000000000000000000..a00c8d0d31d6445b476e88ad5fea20eb7bf09e3d --- /dev/null +++ b/common/polystar/common/image_pipeline/preprocessors/normalise.py @@ -0,0 +1,7 @@ +from polystar.common.models.image import Image +from polystar.common.pipeline.pipe_abc import PipeABC + + +class Normalise(PipeABC): + def transform_single(self, image: Image) -> Image: + return image / 255 diff --git a/common/polystar/common/image_pipeline/preprocessors/resize.py b/common/polystar/common/image_pipeline/preprocessors/resize.py new file mode 100644 index 0000000000000000000000000000000000000000..6afbc2b112b787659de43c8575d61e806d969ae7 --- /dev/null +++ b/common/polystar/common/image_pipeline/preprocessors/resize.py @@ -0,0 +1,14 @@ +from typing import Tuple + +from cv2.cv2 import resize + +from polystar.common.models.image import Image +from polystar.common.pipeline.pipe_abc import PipeABC + + +class Resize(PipeABC): + def __init__(self, size: Tuple[int, int]): + self.size = size + + def transform_single(self, image: Image) -> Image: + return resize(image, self.size) diff --git a/common/polystar/common/models/image.py b/common/polystar/common/models/image.py index 4d598f562cd21be2b1cbb8050e2c9860a5df1447..29a0b13b3f7af5e2d689098932e252af79f446a4 100644 --- a/common/polystar/common/models/image.py +++ b/common/polystar/common/models/image.py @@ -1,6 +1,6 @@ from dataclasses import dataclass, field from pathlib import Path -from typing import Iterable +from typing import Iterable, List import cv2 import numpy as np @@ -38,3 +38,7 @@ def load_images_in_directory( def save_image(image: Image, image_path: Path, conversion: int = cv2.COLOR_RGB2BGR): image_path.parent.mkdir(exist_ok=True, parents=True) cv2.imwrite(str(image_path), cv2.cvtColor(image, conversion)) + + +def file_images_to_images(file_images: Iterable[FileImage]) -> List[Image]: + return [np.asarray(file_image) for file_image in file_images] diff --git a/common/polystar/common/pipeline/classification/classification_pipeline.py b/common/polystar/common/pipeline/classification/classification_pipeline.py index 074cc15e5942abe0593f547939c64747224b1ba5..99c85f5000881185af3362c2856a856f06fcee71 100644 --- a/common/polystar/common/pipeline/classification/classification_pipeline.py +++ b/common/polystar/common/pipeline/classification/classification_pipeline.py @@ -2,7 +2,7 @@ from abc import ABC from enum import IntEnum from typing import ClassVar, Generic, List, Sequence, Tuple, TypeVar -from numpy import asarray, ndarray +from numpy import asarray, ndarray, pad from polystar.common.pipeline.classification.classifier_abc import ClassifierABC from polystar.common.pipeline.pipe_abc import IT, PipeABC @@ -29,6 +29,13 @@ class ClassificationPipeline(Pipeline, Generic[IT, EnumT], ABC): def predict(self, x: Sequence[IT]) -> List[EnumT]: return self.predict_proba_and_classes(x)[1] + def predict_proba(self, x: Sequence[IT]) -> ndarray: + proba = super().predict_proba(x) + missing_classes = self.classifier.n_classes - proba.shape[1] + if not missing_classes: + return proba + return pad(proba, ((0, 0), (0, missing_classes))) + def predict_proba_and_classes(self, x: Sequence[IT]) -> Tuple[ndarray, List[EnumT]]: proba = asarray(self.predict_proba(x)) indices = proba.argmax(axis=1) diff --git a/common/polystar/common/utils/iterable_utils.py b/common/polystar/common/utils/iterable_utils.py index a0046880057618448b223fab0f64ab231619546a..01bc2da41ef0b4ba3894a14cdf989c745c6829fe 100644 --- a/common/polystar/common/utils/iterable_utils.py +++ b/common/polystar/common/utils/iterable_utils.py @@ -1,4 +1,6 @@ -from typing import Iterable +from collections import defaultdict +from itertools import chain +from typing import Callable, Dict, Iterable, List, TypeVar from more_itertools import ilen @@ -8,3 +10,20 @@ def smart_len(it: Iterable) -> int: return len(it) except AttributeError: return ilen(it) + + +T = TypeVar("T") + + +def flatten(it: Iterable[Iterable[T]]) -> List[T]: + return list(chain.from_iterable(it)) + + +U = TypeVar("U") + + +def group_by(it: Iterable[T], key: Callable[[T], U]) -> Dict[U, List[T]]: + rv = defaultdict(list) + for item in it: + rv[key(item)].append(item) + return rv diff --git a/common/polystar/common/utils/markdown.py b/common/polystar/common/utils/markdown.py index 79a9d8360d4fd0e6d6c0ff444c4246a7bcafda04..3997375130872f443774927e0585f9e2d76fc552 100644 --- a/common/polystar/common/utils/markdown.py +++ b/common/polystar/common/utils/markdown.py @@ -1,6 +1,7 @@ from pathlib import Path -from typing import TextIO, Iterable, Any +from typing import Any, Iterable, TextIO +from matplotlib.figure import Figure from pandas import DataFrame from tabulate import tabulate @@ -35,7 +36,11 @@ class MarkdownFile: self.paragraph(f"") return self + def figure(self, figure: Figure, name: str, alt: str = "img"): + figure.savefig(self.markdown_path.parent / name) + return self.image(name, alt) + def table(self, data: DataFrame) -> "MarkdownFile": - self.file.write(tabulate(data, tablefmt="pipe", headers="keys")) + self.file.write(tabulate(data, tablefmt="pipe", headers="keys").replace(".0 ", " ")) self.file.write("\n\n") return self diff --git a/common/research/common/datasets/image_dataset.py b/common/research/common/datasets/image_dataset.py index 13bb5a584cb817f7bcb53da811b70f2e10b3e8d8..9378439106a6d241143240205dbb9674ca716991 100644 --- a/common/research/common/datasets/image_dataset.py +++ b/common/research/common/datasets/image_dataset.py @@ -1,6 +1,6 @@ from pathlib import Path -from polystar.common.models.image import Image +from polystar.common.models.image import FileImage, Image from research.common.datasets.dataset import Dataset from research.common.datasets.lazy_dataset import LazyDataset, TargetT @@ -9,3 +9,6 @@ FileDataset = Dataset[Path, TargetT] LazyImageDataset = LazyDataset[Image, TargetT] ImageDataset = Dataset[Image, TargetT] + +LazyFileImageDataset = LazyDataset[FileImage, TargetT] +FileImageDataset = Dataset[FileImage, TargetT] diff --git a/dataset/dji_roco/robomaster_Final Tournament/digits/.changes b/dataset/dji_roco/robomaster_Final Tournament/digits/.changes index 35d75bb3ff7e3bcf5ffc930deef3313e092ba0fe..094f578582ac0df9123e392b0be3aec31d5a695b 100644 Binary files a/dataset/dji_roco/robomaster_Final Tournament/digits/.changes and b/dataset/dji_roco/robomaster_Final Tournament/digits/.changes differ diff --git a/robots-at-robots/research/robots_at_robots/armor_color/armor_color_benchmarker.py b/robots-at-robots/research/robots_at_robots/armor_color/armor_color_benchmarker.py new file mode 100644 index 0000000000000000000000000000000000000000..a01bf0de068690b9dfb1582a668874aa95739365 --- /dev/null +++ b/robots-at-robots/research/robots_at_robots/armor_color/armor_color_benchmarker.py @@ -0,0 +1,20 @@ +from typing import List + +from polystar.common.models.object import ArmorColor +from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilder +from research.robots_at_robots.armor_color.armor_color_dataset import make_armor_color_dataset_generator +from research.robots_at_robots.evaluation.benchmark import make_armor_value_benchmarker + + +def make_armor_color_benchmarker( + train_roco_datasets: List[ROCODatasetBuilder], test_roco_datasets: List[ROCODatasetBuilder], experiment_name: str +): + dataset_generator = make_armor_color_dataset_generator() + return make_armor_value_benchmarker( + train_roco_datasets=train_roco_datasets, + test_roco_datasets=test_roco_datasets, + evaluation_project="armor-color", + experiment_name=experiment_name, + classes=list(ArmorColor), + dataset_generator=dataset_generator, + ) diff --git a/robots-at-robots/research/robots_at_robots/armor_color/armor_color_pipeline_reporter_factory.py b/robots-at-robots/research/robots_at_robots/armor_color/armor_color_pipeline_reporter_factory.py deleted file mode 100644 index a24ad1a25b129f19b44c2c5cb2f6ec1af498550d..0000000000000000000000000000000000000000 --- a/robots-at-robots/research/robots_at_robots/armor_color/armor_color_pipeline_reporter_factory.py +++ /dev/null @@ -1,24 +0,0 @@ -from typing import List - -from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilder -from research.robots_at_robots.armor_color.armor_color_dataset import make_armor_color_dataset_generator -from research.robots_at_robots.evaluation.image_pipeline_evaluation_reporter import ImagePipelineEvaluationReporter -from research.robots_at_robots.evaluation.image_pipeline_evaluator import ImagePipelineEvaluator - - -class ArmorColorPipelineReporterFactory: - @staticmethod - def from_roco_datasets( - train_roco_datasets: List[ROCODatasetBuilder], - test_roco_datasets: List[ROCODatasetBuilder], - experiment_name: str, - ): - return ImagePipelineEvaluationReporter( - evaluator=ImagePipelineEvaluator( - train_roco_datasets=train_roco_datasets, - test_roco_datasets=test_roco_datasets, - image_dataset_generator=make_armor_color_dataset_generator(), - ), - evaluation_project="armor-color", - experiment_name=experiment_name, - ) diff --git a/robots-at-robots/research/robots_at_robots/armor_color/baseline_experiments.py b/robots-at-robots/research/robots_at_robots/armor_color/benchmark.py similarity index 85% rename from robots-at-robots/research/robots_at_robots/armor_color/baseline_experiments.py rename to robots-at-robots/research/robots_at_robots/armor_color/benchmark.py index 703fefe2757b2afbf2dd9e7cffc620e0b139824e..1ac6f2b9ce966d91659b005f6206fa35f80c7647 100644 --- a/robots-at-robots/research/robots_at_robots/armor_color/baseline_experiments.py +++ b/robots-at-robots/research/robots_at_robots/armor_color/benchmark.py @@ -13,9 +13,7 @@ from polystar.common.pipeline.classification.random_model import RandomClassifie from polystar.common.pipeline.classification.rule_based_classifier import RuleBasedClassifierABC from polystar.common.pipeline.pipe_abc import PipeABC from research.common.datasets.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo -from research.robots_at_robots.armor_color.armor_color_pipeline_reporter_factory import ( - ArmorColorPipelineReporterFactory, -) +from research.robots_at_robots.armor_color.armor_color_benchmarker import make_armor_color_benchmarker class ArmorColorPipeline(ClassificationPipeline): @@ -38,20 +36,20 @@ class RedBlueComparisonClassifier(RuleBasedClassifierABC): if __name__ == "__main__": logging.getLogger().setLevel("INFO") - reporter = ArmorColorPipelineReporterFactory.from_roco_datasets( - train_roco_datasets=[ + _benchmarker = make_armor_color_benchmarker( + [ ROCODatasetsZoo.TWITCH.T470150052, ROCODatasetsZoo.TWITCH.T470152289, ROCODatasetsZoo.TWITCH.T470149568, ROCODatasetsZoo.TWITCH.T470151286, ], - test_roco_datasets=[ + [ ROCODatasetsZoo.TWITCH.T470152838, ROCODatasetsZoo.TWITCH.T470153081, ROCODatasetsZoo.TWITCH.T470158483, ROCODatasetsZoo.TWITCH.T470152730, ], - experiment_name="test", + "test", ) red_blue_comparison_pipeline = ArmorColorPipeline.from_pipes( @@ -62,4 +60,4 @@ if __name__ == "__main__": [RGB2HSV(), Histogram2D(), LogisticRegression()], name="hsv-hist-lr", ) - reporter.report([random_pipeline, red_blue_comparison_pipeline, hsv_hist_lr_pipeline]) + _benchmarker.benchmark([random_pipeline, red_blue_comparison_pipeline, hsv_hist_lr_pipeline]) diff --git a/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_benchmarker.py b/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_benchmarker.py new file mode 100644 index 0000000000000000000000000000000000000000..f4792c43adcec2192b74a2457662cae09d028681 --- /dev/null +++ b/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_benchmarker.py @@ -0,0 +1,20 @@ +from typing import List + +from polystar.common.models.object import ArmorDigit +from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilder +from research.robots_at_robots.armor_digit.armor_digit_dataset import make_armor_digit_dataset_generator +from research.robots_at_robots.evaluation.benchmark import make_armor_value_benchmarker + + +def make_armor_digit_benchmarker( + train_roco_datasets: List[ROCODatasetBuilder], test_roco_datasets: List[ROCODatasetBuilder], experiment_name: str +): + dataset_generator = make_armor_digit_dataset_generator() + return make_armor_value_benchmarker( + train_roco_datasets=train_roco_datasets, + test_roco_datasets=test_roco_datasets, + evaluation_project="armor-digit", + experiment_name=experiment_name, + classes=list(ArmorDigit), + dataset_generator=dataset_generator, + ) diff --git a/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_pipeline_reporter_factory.py b/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_pipeline_reporter_factory.py deleted file mode 100644 index 6c5f9a02c995bc24bfaa7399a148136740e68dcc..0000000000000000000000000000000000000000 --- a/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_pipeline_reporter_factory.py +++ /dev/null @@ -1,24 +0,0 @@ -from typing import List - -from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilder -from research.robots_at_robots.armor_digit.armor_digit_dataset import make_armor_digit_dataset_generator -from research.robots_at_robots.evaluation.image_pipeline_evaluation_reporter import ImagePipelineEvaluationReporter -from research.robots_at_robots.evaluation.image_pipeline_evaluator import ImagePipelineEvaluator - - -class ArmorDigitPipelineReporterFactory: - @staticmethod - def from_roco_datasets( - train_roco_datasets: List[ROCODatasetBuilder], - test_roco_datasets: List[ROCODatasetBuilder], - experiment_name: str, - ): - return ImagePipelineEvaluationReporter( - evaluator=ImagePipelineEvaluator( - train_roco_datasets=train_roco_datasets, - test_roco_datasets=test_roco_datasets, - image_dataset_generator=make_armor_digit_dataset_generator(), - ), - evaluation_project="armor-digit", - experiment_name=experiment_name, - ) diff --git a/robots-at-robots/research/robots_at_robots/armor_digit/benchmark.py b/robots-at-robots/research/robots_at_robots/armor_digit/benchmark.py index 757247e1860a957a00565137351d10a0fc069749..1b48d0ea6c6c17c276ad28d3e1687a2446f42a51 100644 --- a/robots-at-robots/research/robots_at_robots/armor_digit/benchmark.py +++ b/robots-at-robots/research/robots_at_robots/armor_digit/benchmark.py @@ -3,8 +3,6 @@ import warnings from pathlib import Path from typing import List, Sequence, Tuple -import seaborn as sns -from cv2.cv2 import resize from keras_preprocessing.image import ImageDataGenerator from numpy import asarray from tensorflow_core.python.keras import Input, Model, Sequential @@ -15,16 +13,15 @@ from tensorflow_core.python.keras.optimizer_v2.adam import Adam from tensorflow_core.python.keras.optimizer_v2.gradient_descent import SGD from tensorflow_core.python.keras.utils.np_utils import to_categorical +from polystar.common.image_pipeline.preprocessors.normalise import Normalise +from polystar.common.image_pipeline.preprocessors.resize import Resize from polystar.common.models.image import Image from polystar.common.models.object import ArmorDigit from polystar.common.pipeline.classification.classification_pipeline import ClassificationPipeline from polystar.common.pipeline.classification.classifier_abc import ClassifierABC from polystar.common.pipeline.classification.random_model import RandomClassifier -from polystar.common.pipeline.pipe_abc import PipeABC from research.common.datasets.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo -from research.robots_at_robots.armor_digit.armor_digit_pipeline_reporter_factory import ( - ArmorDigitPipelineReporterFactory, -) +from research.robots_at_robots.armor_digit.armor_digit_benchmarker import make_armor_digit_benchmarker class ArmorDigitPipeline(ClassificationPipeline): @@ -45,14 +42,14 @@ class KerasClassifier(ClassifierABC): return ImageDataGenerator(rotation_range=45, zoom_range=[0.8, 1]) # brightness_range=[0.7, 1.4] def fit(self, images: List[Image], labels: List[int]) -> "KerasClassifier": - n_val: int = 540 # FIXME + n_val: int = 371 # FIXME images = asarray(images) labels = to_categorical(asarray(labels), 5) # FIXME train_images, train_labels = images[:-n_val], labels[:-n_val] val_images, val_labels = images[-n_val:], labels[-n_val:] batch_size = 32 # FIXME - train_generator = self.train_data_gen.flow(train_images, train_labels, batch_size) + train_generator = self.train_data_gen.flow(train_images, train_labels, batch_size=batch_size, shuffle=True) self.model.fit( x=train_generator, @@ -100,19 +97,6 @@ class CNN(KerasClassifier): ) -class Resize(PipeABC): - def __init__(self, size: Tuple[int, int]): - self.size = size - - def transform_single(self, image: Image) -> Image: - return resize(image, self.size) - - -class Normalise(PipeABC): - def transform_single(self, image: Image) -> Image: - return image / 255 - - def make_digits_cnn_pipeline( input_size: int, conv_blocks: Sequence[Sequence[int]], report_dir: Path, with_data_augmentation: bool, lr: float ) -> ArmorDigitPipeline: @@ -186,9 +170,7 @@ if __name__ == "__main__": logging.getLogger("tensorflow").setLevel("ERROR") warnings.filterwarnings("ignore") - sns.set_style() - - reporter = ArmorDigitPipelineReporterFactory.from_roco_datasets( + _benchmarker = make_armor_digit_benchmarker( train_roco_datasets=[ # ROCODatasetsZoo.DJI.CENTRAL_CHINA, # ROCODatasetsZoo.DJI.FINAL, @@ -200,32 +182,39 @@ if __name__ == "__main__": ROCODatasetsZoo.TWITCH.T470152289, ], test_roco_datasets=[ - # ROCODatasetsZoo.TWITCH.T470152838, ROCODatasetsZoo.TWITCH.T470153081, ROCODatasetsZoo.TWITCH.T470158483, ROCODatasetsZoo.TWITCH.T470152730, ], - experiment_name="data_augm", + experiment_name="test-benchmarker", ) random_pipeline = ArmorDigitPipeline.from_pipes([RandomClassifier()], name="random") + report_dir = _benchmarker.reporter.report_dir cnn_pipelines = [ - make_digits_cnn_pipeline(32, ((32, 32), (64, 64)), reporter.report_dir, with_data_augmentation=True, lr=lr) - for lr in (1e-2, 5e-3, 2e-3, 1e-3, 5e-4, 2e-4) - ] + [ make_digits_cnn_pipeline( - 64, ((32,), (64, 64), (64, 64)), reporter.report_dir, with_data_augmentation=False, lr=lr + 32, ((32, 32), (64, 64)), report_dir, with_data_augmentation=with_data_augmentation, lr=lr, ) - for lr in (5e-2, 2e-2, 1e-2, 5e-3, 2e-3, 1e-3) + for with_data_augmentation in [False] + for lr in [2.5e-2, 1.6e-2, 1e-2, 6.3e-3, 4e-4] ] + # cnn_pipelines = [ + # make_digits_cnn_pipeline( + # 64, ((32,), (64, 64), (64, 64)), reporter.report_dir, with_data_augmentation=True, lr=lr + # ) + # for with_data_augmentation in [True, False] + # for lr in (5.6e-2, 3.1e-2, 1.8e-2, 1e-2, 5.6e-3, 3.1e-3, 1.8e-3, 1e-3) + # ] vgg16_pipelines = [ - make_vgg16_pipeline(reporter.report_dir, input_size=32, with_data_augmentation=True, lr=lr) + make_vgg16_pipeline(report_dir, input_size=32, with_data_augmentation=False, lr=lr) for lr in (1e-5, 5e-4, 2e-4, 1e-4, 5e-3) ] - logging.info(f"Run `tensorboard --logdir={reporter.report_dir}` for realtime logs") + logging.info(f"Run `tensorboard --logdir={report_dir}` for realtime logs") - reporter.report([random_pipeline, *cnn_pipelines, *vgg16_pipelines]) + _benchmarker.benchmark( + [random_pipeline,] + ) diff --git a/robots-at-robots/research/robots_at_robots/armor_digit/clean_datasets.py b/robots-at-robots/research/robots_at_robots/armor_digit/clean_datasets.py index 943d412fa18d2fc5f17e433920078c948c54e100..394a1c460f442ce985380f5a6fa181d763e9924d 100644 --- a/robots-at-robots/research/robots_at_robots/armor_digit/clean_datasets.py +++ b/robots-at-robots/research/robots_at_robots/armor_digit/clean_datasets.py @@ -18,7 +18,18 @@ if __name__ == "__main__": _armor_digit_dataset = ( make_armor_digit_dataset_generator() .from_roco_dataset(_roco_dataset) - .skip((1009 - 117) + (1000 - 86) + (1000 - 121) + (1000 - 138) + (1000 - 137)) + .skip( + (1009 - 117) + + (1000 - 86) + + (1000 - 121) + + (1000 - 138) + + (1000 - 137) + + (1000 - 154) + + (1000 - 180) + + (1000 - 160) + + (1000 - 193) + + (1000 - 80) + ) .cap(1000) ) diff --git a/robots-at-robots/research/robots_at_robots/dataset/armor_value_dataset_generator.py b/robots-at-robots/research/robots_at_robots/dataset/armor_value_dataset_generator.py index 7b4ce98cf3c8aff86cf9a0ce0705b5267801d27c..4aafd34e781d3ff32eac9a774a7da97e8b3fb448 100644 --- a/robots-at-robots/research/robots_at_robots/dataset/armor_value_dataset_generator.py +++ b/robots-at-robots/research/robots_at_robots/dataset/armor_value_dataset_generator.py @@ -5,6 +5,7 @@ from polystar.common.filters.exclude_filter import ExcludeFilter from polystar.common.filters.filter_abc import FilterABC from polystar.common.filters.pass_through_filter import PassThroughFilter from research.common.dataset.cleaning.dataset_changes import DatasetChanges +from research.common.datasets.image_dataset import FileImageDataset from research.common.datasets.image_file_dataset_builder import DirectoryDatasetBuilder from research.common.datasets.lazy_dataset import TargetT from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilder @@ -28,12 +29,13 @@ class ArmorValueDatasetGenerator(Generic[TargetT]): self.task_name = task_name self.targets_filter = targets_filter or PassThroughFilter() - def from_roco_datasets(self, roco_datasets: List[ROCODatasetBuilder]) -> List[DirectoryDatasetBuilder[TargetT]]: - return [self.from_roco_dataset(roco_dataset) for roco_dataset in roco_datasets] + # FIXME signature inconsistency across methods + def from_roco_datasets(self, roco_datasets: List[ROCODatasetBuilder]) -> List[FileImageDataset[TargetT]]: + return [self.from_roco_dataset(roco_dataset).to_file_images().build() for roco_dataset in roco_datasets] def from_roco_dataset(self, roco_dataset_builder: ROCODatasetBuilder) -> DirectoryDatasetBuilder[TargetT]: cache_dir = roco_dataset_builder.main_dir / self.task_name - dataset_name = f"{roco_dataset_builder.name}_armor_{self.task_name}" + dataset_name = roco_dataset_builder.name ArmorValueDatasetCache(roco_dataset_builder, cache_dir, dataset_name, self.target_factory).generate_if_needed() diff --git a/robots-at-robots/research/robots_at_robots/demos/demo_pipeline.py b/robots-at-robots/research/robots_at_robots/demos/demo_pipeline.py index 0212bf8852c52a1f557b2673384dd5621f23072e..c3a4d34ac4b40d71ae7b4214450be2a5137a0be7 100644 --- a/robots-at-robots/research/robots_at_robots/demos/demo_pipeline.py +++ b/robots-at-robots/research/robots_at_robots/demos/demo_pipeline.py @@ -16,7 +16,7 @@ from polystar.common.utils.tensorflow import patch_tf_v2 from polystar.common.view.plt_results_viewer import PltResultViewer from polystar.robots_at_robots.dependency_injection import make_injector from research.common.datasets.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo -from research.robots_at_robots.armor_color.baseline_experiments import ( +from research.robots_at_robots.armor_color.benchmark import ( ArmorColorPipeline, MeanChannels, RedBlueComparisonClassifier, diff --git a/robots-at-robots/research/robots_at_robots/evaluation/benchmark.py b/robots-at-robots/research/robots_at_robots/evaluation/benchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..045b13dc5da816a3ba4dfde5a42bf20c939b42eb --- /dev/null +++ b/robots-at-robots/research/robots_at_robots/evaluation/benchmark.py @@ -0,0 +1,49 @@ +from dataclasses import dataclass +from typing import List + +from polystar.common.pipeline.classification.classification_pipeline import ClassificationPipeline +from research.common.datasets.image_dataset import FileImageDataset +from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilder +from research.robots_at_robots.dataset.armor_value_dataset_generator import ArmorValueDatasetGenerator +from research.robots_at_robots.evaluation.image_pipeline_evaluation_reporter import ImagePipelineEvaluationReporter +from research.robots_at_robots.evaluation.image_pipeline_evaluator import ImageClassificationPipelineEvaluator +from research.robots_at_robots.evaluation.metrics.f1 import F1Metric +from research.robots_at_robots.evaluation.trainer import ImageClassificationPipelineTrainer + + +@dataclass +class Benchmarker: + def __init__( + self, + train_datasets: List[FileImageDataset], + test_datasets: List[FileImageDataset], + evaluation_project: str, + experiment_name: str, + classes: List, + ): + self.trainer = ImageClassificationPipelineTrainer(train_datasets) + self.evaluator = ImageClassificationPipelineEvaluator(train_datasets, test_datasets) + self.reporter = ImagePipelineEvaluationReporter( + evaluation_project, experiment_name, classes, other_metrics=[F1Metric()] + ) + + def benchmark(self, pipelines: List[ClassificationPipeline]): + self.trainer.train_pipelines(pipelines) + self.reporter.report(self.evaluator.evaluate_pipelines(pipelines)) + + +def make_armor_value_benchmarker( + train_roco_datasets: List[ROCODatasetBuilder], + test_roco_datasets: List[ROCODatasetBuilder], + evaluation_project: str, + experiment_name: str, + dataset_generator: ArmorValueDatasetGenerator, + classes: List, +): + return Benchmarker( + dataset_generator.from_roco_datasets(train_roco_datasets), + dataset_generator.from_roco_datasets(test_roco_datasets), + evaluation_project=evaluation_project, + experiment_name=experiment_name, + classes=classes, + ) diff --git a/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py b/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py index 6cd66e57c0b66c116093d92276dedba7edafa84d..72996a9e2517ff298eca0dda612c3916fe8c55e3 100644 --- a/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py +++ b/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py @@ -1,10 +1,8 @@ from collections import Counter -from dataclasses import dataclass, field -from enum import Enum +from dataclasses import InitVar, dataclass, field from math import log from os.path import relpath -from pathlib import Path -from typing import Dict, Generic, Iterable, List, Optional, Tuple +from typing import Generic, List, Optional, Tuple import matplotlib.pyplot as plt import numpy as np @@ -12,189 +10,233 @@ import seaborn as sns from matplotlib.axes import Axes, logging from matplotlib.figure import Figure from pandas import DataFrame +from sklearn.metrics import classification_report, confusion_matrix from polystar.common.pipeline.classification.classification_pipeline import EnumT -from polystar.common.pipeline.pipeline import Pipeline from polystar.common.utils.dataframe import Format, format_df_row, format_df_rows, make_formater from polystar.common.utils.markdown import MarkdownFile from polystar.common.utils.time import create_time_id from research.common.constants import DSET_DIR, EVALUATION_DIR -from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilder -from research.robots_at_robots.evaluation.image_pipeline_evaluator import ( - ClassificationResults, - ImagePipelineEvaluator, - SetClassificationResults, -) - - -class Metric(Enum): - F1_WEIGHTED_AVG = ("f1-score", "weighted avg") - ACCURACY = ("precision", "accuracy") - - def __str__(self): - if self.value[1] == "accuracy": - return "accuracy" - return " ".join(self.value) - - def __getitem__(self, item): - return self.value[item] +from research.robots_at_robots.evaluation.metrics.accuracy import AccuracyMetric +from research.robots_at_robots.evaluation.metrics.metric_abc import MetricABC +from research.robots_at_robots.evaluation.performance import ClassificationPerformance, ClassificationPerformances +from research.robots_at_robots.evaluation.set import Set @dataclass class ImagePipelineEvaluationReporter(Generic[EnumT]): - evaluator: ImagePipelineEvaluator[EnumT] evaluation_project: str experiment_name: str - main_metric: Metric = Metric.F1_WEIGHTED_AVG - other_metrics: List[Metric] = field(default_factory=lambda: [Metric.ACCURACY]) + classes: List[EnumT] + main_metric: MetricABC = field(default_factory=AccuracyMetric) + other_metrics: InitVar[List[MetricABC]] = None + _mf: MarkdownFile = field(init=False) + _performances: ClassificationPerformances = field(init=False) - def __post_init__(self): + def __post_init__(self, other_metrics: List[MetricABC]): self.report_dir = EVALUATION_DIR / self.evaluation_project / f"{create_time_id()}_{self.experiment_name}" + self.all_metrics: List[MetricABC] = [self.main_metric] + (other_metrics or []) - def report(self, pipelines: Iterable[Pipeline]): - logging.info(f"Running experiment {self.experiment_name}") - - pipeline2results = self.evaluator.evaluate_pipelines(pipelines) + def report(self, performances: ClassificationPerformances): + sns.set() + self._performances = performances + with MarkdownFile(self.report_dir / "report.md") as self._mf: - with MarkdownFile(self.report_dir / "report.md") as mf: - mf.title(f"Evaluation report") - self._report_datasets(mf) - self._report_aggregated_results(mf, pipeline2results, self.report_dir) - self._report_pipelines_results(mf, pipeline2results) + self._mf.title(f"Evaluation report") + self._report_datasets() + self._report_aggregated_results() + self._report_pipelines_results() logging.info(f"Report generated at file:///{self.report_dir/'report.md'}") - def _report_datasets(self, mf: MarkdownFile): - mf.title("Datasets", level=2) + def _report_datasets(self): + self._mf.title("Datasets", level=2) - mf.title("Training", level=3) - self._report_dataset( - mf, self.evaluator.train_roco_datasets, self.evaluator.train_dataset_sizes, self.evaluator.train_labels - ) + self._mf.title("Training", level=3) + self._report_dataset(self._performances.train) - mf.title("Testing", level=3) - self._report_dataset( - mf, self.evaluator.test_roco_datasets, self.evaluator.test_dataset_sizes, self.evaluator.test_labels - ) + self._mf.title("Testing", level=3) + self._report_dataset(self._performances.test) - @staticmethod - def _report_dataset( - mf: MarkdownFile, roco_datasets: List[ROCODatasetBuilder], dataset_sizes: List[int], labels: List[EnumT] - ): - total = len(labels) - labels = [str(label) for label in labels] - mf.paragraph(f"{total} images") + def _report_dataset(self, performances: ClassificationPerformances): df = ( - DataFrame( - { - dataset.name: Counter(labels[start:end]) - for dataset, start, end in zip( - roco_datasets, np.cumsum([0] + dataset_sizes), np.cumsum(dataset_sizes) - ) - } - ) + DataFrame({perf.dataset_name: Counter(perf.labels) for perf in performances}) .fillna(0) .sort_index() + .astype(int) ) - df["Total"] = sum([df[d.name] for d in roco_datasets]) - df["Repartition"] = (df["Total"] / total).map("{:.1%}".format) - mf.table(df) - - def _report_aggregated_results( - self, mf: MarkdownFile, pipeline2results: Dict[str, ClassificationResults[EnumT]], report_dir: Path - ): - fig_scores, fig_times, aggregated_results = self._aggregate_results(pipeline2results) - aggregated_scores_image_name = "aggregated_scores.png" - fig_scores.savefig(report_dir / aggregated_scores_image_name) - aggregated_times_image_name = "aggregated_times.png" - fig_times.savefig(report_dir / aggregated_times_image_name) - - mf.title("Aggregated results", level=2) - mf.image(aggregated_scores_image_name) - mf.image(aggregated_times_image_name) - mf.paragraph("On test set:") - mf.table(aggregated_results[aggregated_results["set"] == "test"].drop(columns="set")) - mf.paragraph("On train set:") - mf.table(aggregated_results[aggregated_results["set"] == "train"].drop(columns="set")) - - def _report_pipelines_results(self, mf: MarkdownFile, pipeline2results: Dict[str, ClassificationResults[EnumT]]): - for pipeline_name, results in sorted( - pipeline2results.items(), - key=lambda name_results: name_results[1].test_results.report[self.main_metric[1]][self.main_metric[0]], + df["Total"] = df.sum(axis=1) + df["Repartition"] = df["Total"] / df["Total"].sum() + df.loc["Total"] = df.sum() + df.loc["Repartition"] = df.loc["Total"] / df["Total"]["Total"] + dset_repartition = df.loc["Repartition"].map("{:.1%}".format) + df["Repartition"] = df["Repartition"].map("{:.1%}".format) + df.loc["Repartition"] = dset_repartition + df.at["Total", "Repartition"] = "" + df.at["Repartition", "Repartition"] = "" + df.at["Repartition", "Total"] = "" + self._mf.table(df) + + def _report_aggregated_results(self): + fig_scores, fig_times = self._make_aggregate_figures() + + self._mf.title("Aggregated results", level=2) + self._mf.figure(fig_scores, "aggregated_scores.png") + self._mf.figure(fig_times, "aggregated_times.png") + + self._mf.paragraph("On test set:") + self._mf.table(self._make_aggregated_results_for_set(Set.TRAIN)) + self._mf.paragraph("On train set:") + self._mf.table(self._make_aggregated_results_for_set(Set.TEST)) + + def _report_pipelines_results(self): + for pipeline_name, performances in sorted( + self._performances.group_by_pipeline().items(), + key=lambda name_perfs: self.main_metric(name_perfs[1].test.merge()), reverse=True, ): - self._report_pipeline_results(mf, pipeline_name, results) + self._report_pipeline_results(pipeline_name, performances) - def _report_pipeline_results(self, mf: MarkdownFile, pipeline_name: str, results: ClassificationResults[EnumT]): - mf.title(pipeline_name, level=2) + def _report_pipeline_results(self, pipeline_name: str, performances: ClassificationPerformances): + self._mf.title(pipeline_name, level=2) - mf.paragraph(results.full_pipeline_name) + self._mf.title("Train results", level=3) + self._report_pipeline_set_results(performances, Set.TRAIN) - mf.title("Train results", level=3) - ImagePipelineEvaluationReporter._report_pipeline_set_results( - mf, results.train_results, self.evaluator.train_images_paths - ) + self._mf.title("Test results", level=3) + self._report_pipeline_set_results(performances, Set.TEST) - mf.title("Test results", level=3) - ImagePipelineEvaluationReporter._report_pipeline_set_results( - mf, results.test_results, self.evaluator.test_images_paths - ) + def _report_pipeline_set_results(self, performances: ClassificationPerformances, set_: Set): + performances = performances.on_set(set_) + perf = performances.merge() + + self._mf.title("Metrics", level=4) + self._report_pipeline_set_metrics(performances, perf, set_) - @staticmethod - def _report_pipeline_set_results( - mf: MarkdownFile, results: SetClassificationResults[EnumT], image_paths: List[Path] + self._mf.title("Confusion Matrix:", level=4) + self._report_pipeline_set_confusion_matrix(perf) + + self._mf.title("25 Mistakes examples", level=4) + self._report_pipeline_set_mistakes(perf) + + def _report_pipeline_set_metrics( + self, performances: ClassificationPerformances, perf: ClassificationPerformance, set_: Set ): - mf.title("Metrics", level=4) - mf.paragraph(f"Inference time: {results.mean_inference_time: .2e} s/img") - df = DataFrame(results.report) + fig: Figure = plt.figure(figsize=(9, 6)) + ax: Axes = fig.subplots() + sns.barplot( + data=DataFrame( + [ + {"dataset": performance.dataset_name, "score": metric(performance), "metric": metric.name} + for performance in performances + for metric in self.all_metrics + ] + + [ + {"dataset": performance.dataset_name, "score": len(performance) / len(perf), "metric": "support"} + for performance in performances + ] + ), + x="dataset", + hue="metric", + y="score", + ax=ax, + ) + ax.set_xticklabels(ax.get_xticklabels(), rotation=30, ha="right") + pipeline_name = performances.performances[0].pipeline_name + fig.suptitle(f"{pipeline_name} performance across {set_} datasets") + _format_ax(ax, "{:.1%}", limits=(0, 1)) + fig.tight_layout() + self._mf.figure(fig, f"{pipeline_name}_{set_}.png") + + self._mf.paragraph(f"Inference time: {perf.mean_inference_time: .2e} s/img") + df = DataFrame(classification_report(perf.labels, perf.predictions, output_dict=True)) format_df_rows(df, ["precision", "recall", "f1-score"], "{:.1%}") format_df_row(df, "support", int) - mf.table(df) - mf.title("Confusion Matrix:", level=4) - mf.table(DataFrame(results.confusion_matrix, index=results.unique_labels, columns=results.unique_labels)) - mf.title("25 Mistakes examples", level=4) - mistakes_idx = np.random.choice(results.mistakes, min(len(results.mistakes), 25), replace=False) + self._mf.table(df) + + def _report_pipeline_set_confusion_matrix(self, perf: ClassificationPerformance): + self._mf.table( + DataFrame( + confusion_matrix(perf.labels, perf.predictions), index=perf.unique_labels, columns=perf.unique_labels + ) + ) + + def _report_pipeline_set_mistakes(self, perf: ClassificationPerformance): + mistakes = perf.mistakes + mistakes_idx = np.random.choice(mistakes, min(len(mistakes), 25), replace=False) relative_paths = [ - f", str(mf.markdown_path.parent))})" for idx in mistakes_idx + f", str(self._mf.markdown_path.parent))})" + for idx in mistakes_idx + ] + images_names = [ + f"[{perf.examples[idx].path.relative_to(DSET_DIR)}]" + f"({relpath(str(perf.examples[idx].path), str(self._mf.markdown_path.parent))})" + for idx in mistakes_idx ] - images_names = [image_paths[idx].relative_to(DSET_DIR) for idx in mistakes_idx] - mf.table( + self._mf.table( DataFrame( { "images": relative_paths, - "labels": map(str, results.labels[mistakes_idx]), - "predictions": map(str, results.predictions[mistakes_idx]), + "labels": perf.labels[mistakes_idx], + "predictions": perf.predictions[mistakes_idx], + **{ + f"p({str(label)})": map("{:.1%}".format, perf.proba[mistakes_idx, i]) + for i, label in enumerate(self.classes) + }, "image names": images_names, } ).set_index("images") ) - def _aggregate_results( - self, pipeline2results: Dict[str, ClassificationResults[EnumT]] - ) -> Tuple[Figure, Figure, DataFrame]: - sns.set_style() - sets = ["train", "test"] + def _make_aggregate_figures(self) -> Tuple[Figure, Figure]: df = DataFrame.from_records( [ { - "pipeline": pipeline_name, - str(self.main_metric): results.on_set(set_).report[self.main_metric[1]][self.main_metric[0]], - "inference time": results.on_set(set_).mean_inference_time, - "set": set_, + "dataset": perf.dataset_name, + "pipeline": perf.pipeline_name, + self.main_metric.name: self.main_metric(perf), + "time": perf.mean_inference_time, + "set": perf.set_.name.lower(), + "support": len(perf), } - for pipeline_name, results in pipeline2results.items() - # for metric in [self.main_metric] # + self.other_metrics - for set_ in sets + for perf in self._performances ] - ).sort_values(["set", str(self.main_metric)], ascending=[True, False]) + ).sort_values(["set", self.main_metric.name], ascending=[True, False]) + df[f"{self.main_metric.name} "] = list(zip(df[self.main_metric.name], df.support)) + df["time "] = list(zip(df[self.main_metric.name], df.support)) + + return ( + _cat_pipeline_results(df, f"{self.main_metric.name} ", "{:.1%}", limits=(0, 1)), + _cat_pipeline_results(df, "time ", "{:.2e}", log_scale=True), + ) + + def _make_aggregated_results_for_set(self, set_: Set) -> DataFrame: + pipeline2performances = self._performances.on_set(set_).group_by_pipeline() + pipeline2performance = { + pipeline_name: performances.merge() for pipeline_name, performances in pipeline2performances.items() + } return ( - _cat_pipeline_results(df, str(self.main_metric), "{:.1%}", limits=(0, 1)), - _cat_pipeline_results(df, "inference time", "{:.2e}", log_scale=True), - df.set_index("pipeline"), + DataFrame( + [ + { + "pipeline": pipeline_name, + self.main_metric.name: self.main_metric(performance), + "inference time": performance.mean_inference_time, + } + for pipeline_name, performance in pipeline2performance.items() + ] + ) + .set_index("pipeline") + .sort_values(self.main_metric.name, ascending=False) ) +def weighted_mean(x, **kws): + val, weight = map(np.asarray, zip(*x)) + return (val * weight).sum() / weight.sum() + + def _cat_pipeline_results( df: DataFrame, y: str, fmt: str, limits: Optional[Tuple[float, float]] = None, log_scale: bool = False ) -> Figure: @@ -208,6 +250,8 @@ def _cat_pipeline_results( legend=False, col_order=["test", "train"], height=10, + estimator=weighted_mean, + orient="v", ) grid.set_xticklabels(rotation=30, ha="right") diff --git a/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluator.py b/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluator.py index 2a2370602aaf2b60f7df81bd872808fedf8a9043..266de795dd14914537cade66ee3f0cd560aaf039 100644 --- a/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluator.py +++ b/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluator.py @@ -1,111 +1,57 @@ -import logging -from dataclasses import dataclass from enum import Enum -from pathlib import Path +from itertools import chain from time import time -from typing import Dict, Generic, Iterable, List, Sequence, Tuple +from typing import Generic, Iterable, List import numpy as np -from memoized_property import memoized_property -from sklearn.metrics import classification_report, confusion_matrix -from tqdm import tqdm -from polystar.common.models.image import Image, load_images -from polystar.common.pipeline.pipeline import Pipeline +from polystar.common.models.image import file_images_to_images +from polystar.common.pipeline.classification.classification_pipeline import ClassificationPipeline +from polystar.common.utils.iterable_utils import flatten +from research.common.datasets.image_dataset import FileImageDataset from research.common.datasets.lazy_dataset import TargetT -from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilder -from research.common.datasets.union_dataset import UnionDataset -from research.robots_at_robots.dataset.armor_value_dataset_generator import ArmorValueDatasetGenerator +from research.robots_at_robots.evaluation.performance import ( + ClassificationPerformance, + ClassificationPerformances, + ContextualizedClassificationPerformance, +) +from research.robots_at_robots.evaluation.set import Set -@dataclass -class SetClassificationResults(Generic[TargetT]): - labels: np.ndarray - predictions: np.ndarray - mean_inference_time: float - - @property - def report(self) -> Dict: - return classification_report(self.labels, self.predictions, output_dict=True) - - @property - def confusion_matrix(self) -> Dict: - return confusion_matrix(self.labels, self.predictions) - - @property - def mistakes(self) -> Sequence[int]: - return np.where(self.labels != self.predictions)[0] - - @memoized_property - def unique_labels(self) -> List[TargetT]: - return sorted(set(self.labels) | set(self.predictions)) - - -@dataclass -class ClassificationResults(Generic[TargetT]): - train_results: SetClassificationResults[TargetT] - test_results: SetClassificationResults[TargetT] - full_pipeline_name: str - - def on_set(self, set_: str) -> SetClassificationResults[TargetT]: - if set_ is "train": - return self.train_results - return self.test_results - - -class ImagePipelineEvaluator(Generic[TargetT]): +class ImageClassificationPipelineEvaluator(Generic[TargetT]): def __init__( - self, - train_roco_datasets: List[ROCODatasetBuilder], - test_roco_datasets: List[ROCODatasetBuilder], - image_dataset_generator: ArmorValueDatasetGenerator[TargetT], + self, train_datasets: List[FileImageDataset], test_datasets: List[FileImageDataset], ): - logging.info("Loading data") - self.train_roco_datasets = train_roco_datasets - self.test_roco_datasets = test_roco_datasets - (self.train_images_paths, self.train_images, self.train_labels, self.train_dataset_sizes) = load_datasets( - train_roco_datasets, image_dataset_generator - ) - (self.test_images_paths, self.test_images, self.test_labels, self.test_dataset_sizes) = load_datasets( - test_roco_datasets, image_dataset_generator - ) - - def evaluate_pipelines(self, pipelines: Iterable[Pipeline]) -> Dict[str, ClassificationResults]: - tqdm_pipelines = tqdm(pipelines, desc="Training", unit="pipeline") - return {str(pipeline): self.evaluate_pipeline(pipeline, tqdm_pipelines) for pipeline in tqdm_pipelines} + self.train_datasets = train_datasets + self.test_datasets = test_datasets - def evaluate_pipeline(self, pipeline: Pipeline, tqdm_pipelines: tqdm) -> ClassificationResults: - tqdm_pipelines.set_postfix({"pipeline": pipeline.name}, True) - pipeline.fit(self.train_images, self.train_labels) + def evaluate_pipelines(self, pipelines: Iterable[ClassificationPipeline]) -> ClassificationPerformances: + return ClassificationPerformances(flatten(self._evaluate_pipeline(pipeline) for pipeline in pipelines)) - train_results = self._evaluate_pipeline_on_set(pipeline, self.train_images, self.train_labels) - test_results = self._evaluate_pipeline_on_set(pipeline, self.test_images, self.test_labels) - - return ClassificationResults( - train_results=train_results, test_results=test_results, full_pipeline_name=repr(pipeline), + def _evaluate_pipeline(self, pipeline: ClassificationPipeline) -> Iterable[ContextualizedClassificationPerformance]: + return chain( + self._evaluate_pipeline_on_set(pipeline, self.train_datasets, Set.TRAIN), + self._evaluate_pipeline_on_set(pipeline, self.test_datasets, Set.TEST), ) @staticmethod def _evaluate_pipeline_on_set( - pipeline: Pipeline, images: List[Image], labels: List[TargetT] - ) -> SetClassificationResults: - t = time() - preds = pipeline.predict(images) - mean_time = (time() - t) / len(images) - return SetClassificationResults(_labels_to_numpy(labels), _labels_to_numpy(preds), mean_time) - - -def load_datasets( - roco_datasets: List[ROCODatasetBuilder], image_dataset_generator: ArmorValueDatasetGenerator[TargetT], -) -> Tuple[List[Path], List[Image], List[TargetT], List[int]]: - # TODO we should receive a list of FileImageDataset - datasets = [builder.build() for builder in image_dataset_generator.from_roco_datasets(roco_datasets)] - dataset_sizes = [len(d) for d in datasets] - - dataset = UnionDataset(datasets) - paths, targets = list(dataset.examples), list(dataset.targets) - images = list(load_images(paths)) - return paths, images, targets, dataset_sizes + pipeline: ClassificationPipeline, datasets: List[FileImageDataset], set_: Set + ) -> Iterable[ContextualizedClassificationPerformance]: + for dataset in datasets: + t = time() + proba, classes = pipeline.predict_proba_and_classes(file_images_to_images(dataset.examples)) + mean_time = (time() - t) / len(dataset) + yield ContextualizedClassificationPerformance( + examples=dataset.examples, + labels=_labels_to_numpy(dataset.targets), + predictions=_labels_to_numpy(classes), + proba=proba, + mean_inference_time=mean_time, + set_=set_, + dataset_name=dataset.name, + pipeline_name=pipeline.name, + ) def _labels_to_numpy(labels: List[Enum]) -> np.ndarray: diff --git a/robots-at-robots/research/robots_at_robots/evaluation/metrics/__init__.py b/robots-at-robots/research/robots_at_robots/evaluation/metrics/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/robots-at-robots/research/robots_at_robots/evaluation/metrics/accuracy.py b/robots-at-robots/research/robots_at_robots/evaluation/metrics/accuracy.py new file mode 100644 index 0000000000000000000000000000000000000000..ccfe9c73bdda26c7c0624fa3220d30e335a76506 --- /dev/null +++ b/robots-at-robots/research/robots_at_robots/evaluation/metrics/accuracy.py @@ -0,0 +1,11 @@ +from research.robots_at_robots.evaluation.metrics.metric_abc import MetricABC +from research.robots_at_robots.evaluation.performance import ClassificationPerformance + + +class AccuracyMetric(MetricABC): + def __call__(self, performance: ClassificationPerformance) -> float: + return (performance.labels == performance.predictions).mean() + + @property + def name(self) -> str: + return "accuracy" diff --git a/robots-at-robots/research/robots_at_robots/evaluation/metrics/f1.py b/robots-at-robots/research/robots_at_robots/evaluation/metrics/f1.py new file mode 100644 index 0000000000000000000000000000000000000000..dd5f48ae0202e7b917f58e7d6f94b1713de2caff --- /dev/null +++ b/robots-at-robots/research/robots_at_robots/evaluation/metrics/f1.py @@ -0,0 +1,30 @@ +from enum import Enum, auto + +from sklearn.metrics import f1_score + +from research.robots_at_robots.evaluation.metrics.metric_abc import MetricABC +from research.robots_at_robots.evaluation.performance import ClassificationPerformance + + +class F1Strategy(Enum): + MICRO = auto() + MACRO = auto() + SAMPLES = auto() + WEIGHTED = auto() + + def __repr__(self): + return self.name.lower() + + __str__ = __repr__ + + +class F1Metric(MetricABC): + def __init__(self, strategy: F1Strategy = F1Strategy.MACRO): + self.strategy = strategy + + def __call__(self, performance: ClassificationPerformance) -> float: + return f1_score(performance.labels, performance.predictions, average=str(self.strategy)) + + @property + def name(self) -> str: + return f"f1 {self.strategy}" diff --git a/robots-at-robots/research/robots_at_robots/evaluation/metrics/metric_abc.py b/robots-at-robots/research/robots_at_robots/evaluation/metrics/metric_abc.py new file mode 100644 index 0000000000000000000000000000000000000000..f25a0c3f122a311d3495e74a5d02a3d9eff224e2 --- /dev/null +++ b/robots-at-robots/research/robots_at_robots/evaluation/metrics/metric_abc.py @@ -0,0 +1,17 @@ +from abc import ABC, abstractmethod + +from research.robots_at_robots.evaluation.performance import ClassificationPerformance + + +class MetricABC(ABC): + @abstractmethod + def __call__(self, performance: ClassificationPerformance) -> float: + pass + + @property + @abstractmethod + def name(self) -> str: + pass + + def __repr__(self): + return self.name diff --git a/robots-at-robots/research/robots_at_robots/evaluation/performance.py b/robots-at-robots/research/robots_at_robots/evaluation/performance.py new file mode 100644 index 0000000000000000000000000000000000000000..33c0bc765a301b1bbe3c956948fa3351052cce6b --- /dev/null +++ b/robots-at-robots/research/robots_at_robots/evaluation/performance.py @@ -0,0 +1,79 @@ +from dataclasses import dataclass +from typing import Dict, Iterable, List, Sequence + +import numpy as np +from memoized_property import memoized_property + +from polystar.common.filters.filter_abc import FilterABC +from polystar.common.models.image import FileImage +from polystar.common.utils.iterable_utils import flatten, group_by +from research.robots_at_robots.evaluation.set import Set + + +@dataclass +class ClassificationPerformance: + examples: List[FileImage] + labels: np.ndarray + predictions: np.ndarray + proba: np.ndarray + mean_inference_time: float + + @property + def mistakes(self) -> Sequence[int]: + return np.where(self.labels != self.predictions)[0] + + @memoized_property + def unique_labels(self): + return sorted(set(self.labels) | set(self.predictions)) + + def __len__(self) -> int: + return len(self.labels) + + +@dataclass +class ContextualizedClassificationPerformance(ClassificationPerformance): + set_: Set + dataset_name: str + pipeline_name: str + + +@dataclass +class ClassificationPerformances(Iterable[ContextualizedClassificationPerformance]): + performances: List[ContextualizedClassificationPerformance] + + @property + def train(self) -> "ClassificationPerformances": + return self.on_set(Set.TRAIN) + + @property + def test(self) -> "ClassificationPerformances": + return self.on_set(Set.TEST) + + def on_set(self, set_: Set) -> "ClassificationPerformances": + return ClassificationPerformances(SetClassificationPerformanceFilter(set_).filter(self.performances)) + + def group_by_pipeline(self) -> Dict[str, "ClassificationPerformances"]: + return { + name: ClassificationPerformances(performances) + for name, performances in group_by(self, lambda p: p.pipeline_name).items() + } + + def merge(self) -> ClassificationPerformance: + return ClassificationPerformance( + examples=flatten(p.examples for p in self), + labels=np.concatenate([p.labels for p in self]), + predictions=np.concatenate([p.predictions for p in self]), + proba=np.concatenate([p.proba for p in self]), + mean_inference_time=np.average([p.mean_inference_time for p in self], weights=[len(p) for p in self]), + ) + + def __iter__(self): + return iter(self.performances) + + +@dataclass +class SetClassificationPerformanceFilter(FilterABC[ContextualizedClassificationPerformance]): + set_: Set + + def validate_single(self, perf: ContextualizedClassificationPerformance) -> bool: + return perf.set_ is self.set_ diff --git a/robots-at-robots/research/robots_at_robots/evaluation/set.py b/robots-at-robots/research/robots_at_robots/evaluation/set.py new file mode 100644 index 0000000000000000000000000000000000000000..6175a68587d575e3d18cacf456ab99da45925220 --- /dev/null +++ b/robots-at-robots/research/robots_at_robots/evaluation/set.py @@ -0,0 +1,14 @@ +from dataclasses import dataclass +from enum import Enum, auto + + +@dataclass +class Set(Enum): + TRAIN = auto() + VALIDATION = auto() + TEST = auto() + + def __repr__(self): + return self.name.lower() + + __str__ = __repr__ diff --git a/robots-at-robots/research/robots_at_robots/evaluation/trainer.py b/robots-at-robots/research/robots_at_robots/evaluation/trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..6731cd00fd39b312d7e878eecc55fbb4d85adfb6 --- /dev/null +++ b/robots-at-robots/research/robots_at_robots/evaluation/trainer.py @@ -0,0 +1,25 @@ +from typing import Generic, List + +from tqdm import tqdm + +from polystar.common.models.image import file_images_to_images +from polystar.common.pipeline.classification.classification_pipeline import ClassificationPipeline +from research.common.datasets.image_dataset import FileImageDataset +from research.common.datasets.lazy_dataset import TargetT +from research.common.datasets.union_dataset import UnionDataset + + +class ImageClassificationPipelineTrainer(Generic[TargetT]): + def __init__(self, training_datasets: List[FileImageDataset]): + train_dataset = UnionDataset(training_datasets) + self.images = file_images_to_images(train_dataset.examples) + self.labels = train_dataset.targets + + def train_pipeline(self, pipeline: ClassificationPipeline): + pipeline.fit(self.images, self.labels) + + def train_pipelines(self, pipelines: List[ClassificationPipeline]): + tqdm_pipelines = tqdm(pipelines, desc="Training Pipelines") + for pipeline in tqdm_pipelines: + tqdm_pipelines.set_postfix({"pipeline": pipeline.name}, True) + self.train_pipeline(pipeline)