diff --git a/common/polystar/common/pipeline/classification/classification_pipeline.py b/common/polystar/common/pipeline/classification/classification_pipeline.py index 99c85f5000881185af3362c2856a856f06fcee71..56086c97eaa11a2d85369f04b1e2920fcda610dc 100644 --- a/common/polystar/common/pipeline/classification/classification_pipeline.py +++ b/common/polystar/common/pipeline/classification/classification_pipeline.py @@ -22,7 +22,9 @@ class ClassificationPipeline(Pipeline, Generic[IT, EnumT], ABC): def classifier(self) -> ClassifierABC: return self.steps[-1][-1] - def fit(self, x: Sequence[IT], y: List[EnumT], **fit_params): + def fit(self, x: Sequence[IT], y: List[EnumT], validation_size: int = 0, **fit_params): + if isinstance(self.classifier, ClassifierABC): + fit_params[f"{self.classifier.__class__.__name__}__validation_size"] = validation_size y_indices = _labels_to_indices(y) return super().fit(x, y_indices, **fit_params) diff --git a/common/polystar/common/pipeline/classification/classifier_abc.py b/common/polystar/common/pipeline/classification/classifier_abc.py index 64c89b96e323ee85e9309fd2ed757ac0d418de21..3016baf348e6d388ec4d5e608e893857cc75cbb7 100644 --- a/common/polystar/common/pipeline/classification/classifier_abc.py +++ b/common/polystar/common/pipeline/classification/classifier_abc.py @@ -10,7 +10,7 @@ from polystar.common.utils.named_mixin import NamedMixin class ClassifierABC(BaseEstimator, NamedMixin, Generic[IT], ABC): n_classes: int - def fit(self, examples: List[IT], label_indices: List[int]) -> "ClassifierABC": + def fit(self, examples: List[IT], label_indices: List[int], validation_size: int) -> "ClassifierABC": return self @abstractmethod diff --git a/common/polystar/common/pipeline/classification/random_model.py b/common/polystar/common/pipeline/classification/random_model.py index d6a13a9520b3e841b6912446d8305784d13ae789..9080f7afa45fd11b96af805348de02d6896e8257 100644 --- a/common/polystar/common/pipeline/classification/random_model.py +++ b/common/polystar/common/pipeline/classification/random_model.py @@ -11,7 +11,7 @@ class RandomClassifier(RuleBasedClassifierABC): def predict(self, examples: np.ndarray) -> List[int]: return choice(range(self.n_classes), size=len(examples), replace=True, p=self.weights_) - def fit(self, examples: List, label_indices: List[int]) -> "RandomClassifier": + def fit(self, examples: List, label_indices: List[int], validation_size: int) -> "RandomClassifier": indices2counts = Counter(label_indices) self.weights_ = [indices2counts[i] / len(label_indices) for i in range(self.n_classes)] return self diff --git a/common/polystar/common/utils/markdown.py b/common/polystar/common/utils/markdown.py index 3997375130872f443774927e0585f9e2d76fc552..1aa5e5f24ce51ac7d3233def6f4c4412985d2248 100644 --- a/common/polystar/common/utils/markdown.py +++ b/common/polystar/common/utils/markdown.py @@ -33,7 +33,7 @@ class MarkdownFile: return self def image(self, relative_path: str, alt: str = "img") -> "MarkdownFile": - self.paragraph(f"") + self.paragraph(f".replace(' ', '%20')})") return self def figure(self, figure: Figure, name: str, alt: str = "img"): diff --git a/robots-at-robots/research/robots_at_robots/armor_color/armor_color_benchmarker.py b/robots-at-robots/research/robots_at_robots/armor_color/armor_color_benchmarker.py index a01bf0de068690b9dfb1582a668874aa95739365..37a9e35e4a2601bc651b4cb7665f0aea43be73f8 100644 --- a/robots-at-robots/research/robots_at_robots/armor_color/armor_color_benchmarker.py +++ b/robots-at-robots/research/robots_at_robots/armor_color/armor_color_benchmarker.py @@ -7,11 +7,15 @@ from research.robots_at_robots.evaluation.benchmark import make_armor_value_benc def make_armor_color_benchmarker( - train_roco_datasets: List[ROCODatasetBuilder], test_roco_datasets: List[ROCODatasetBuilder], experiment_name: str + train_roco_datasets: List[ROCODatasetBuilder], + validation_roco_datasets: List[ROCODatasetBuilder], + test_roco_datasets: List[ROCODatasetBuilder], + experiment_name: str, ): dataset_generator = make_armor_color_dataset_generator() return make_armor_value_benchmarker( train_roco_datasets=train_roco_datasets, + validation_roco_datasets=validation_roco_datasets, test_roco_datasets=test_roco_datasets, evaluation_project="armor-color", experiment_name=experiment_name, diff --git a/robots-at-robots/research/robots_at_robots/armor_color/benchmark.py b/robots-at-robots/research/robots_at_robots/armor_color/benchmark.py index 1ac6f2b9ce966d91659b005f6206fa35f80c7647..441fb0d709354c2190877c8b3d5a293071b98aad 100644 --- a/robots-at-robots/research/robots_at_robots/armor_color/benchmark.py +++ b/robots-at-robots/research/robots_at_robots/armor_color/benchmark.py @@ -37,19 +37,20 @@ if __name__ == "__main__": logging.getLogger().setLevel("INFO") _benchmarker = make_armor_color_benchmarker( - [ + train_roco_datasets=[ ROCODatasetsZoo.TWITCH.T470150052, ROCODatasetsZoo.TWITCH.T470152289, ROCODatasetsZoo.TWITCH.T470149568, ROCODatasetsZoo.TWITCH.T470151286, ], - [ + validation_roco_datasets=[], + test_roco_datasets=[ ROCODatasetsZoo.TWITCH.T470152838, ROCODatasetsZoo.TWITCH.T470153081, ROCODatasetsZoo.TWITCH.T470158483, ROCODatasetsZoo.TWITCH.T470152730, ], - "test", + experiment_name="test", ) red_blue_comparison_pipeline = ArmorColorPipeline.from_pipes( diff --git a/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_benchmarker.py b/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_benchmarker.py index f4792c43adcec2192b74a2457662cae09d028681..d55d54a4202a2b3cc771a6dc6b64d972c25668a4 100644 --- a/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_benchmarker.py +++ b/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_benchmarker.py @@ -7,11 +7,15 @@ from research.robots_at_robots.evaluation.benchmark import make_armor_value_benc def make_armor_digit_benchmarker( - train_roco_datasets: List[ROCODatasetBuilder], test_roco_datasets: List[ROCODatasetBuilder], experiment_name: str + train_roco_datasets: List[ROCODatasetBuilder], + validation_roco_datasets: List[ROCODatasetBuilder], + test_roco_datasets: List[ROCODatasetBuilder], + experiment_name: str, ): dataset_generator = make_armor_digit_dataset_generator() return make_armor_value_benchmarker( train_roco_datasets=train_roco_datasets, + validation_roco_datasets=validation_roco_datasets, test_roco_datasets=test_roco_datasets, evaluation_project="armor-digit", experiment_name=experiment_name, diff --git a/robots-at-robots/research/robots_at_robots/armor_digit/benchmark.py b/robots-at-robots/research/robots_at_robots/armor_digit/benchmark.py index 1b48d0ea6c6c17c276ad28d3e1687a2446f42a51..1c7a80f81c2449e352fc7795b6e09b6053f9448e 100644 --- a/robots-at-robots/research/robots_at_robots/armor_digit/benchmark.py +++ b/robots-at-robots/research/robots_at_robots/armor_digit/benchmark.py @@ -29,7 +29,8 @@ class ArmorDigitPipeline(ClassificationPipeline): class KerasClassifier(ClassifierABC): - def __init__(self, model: Model, optimizer, logs_dir: Path, with_data_augmentation: bool): + def __init__(self, model: Model, optimizer, logs_dir: Path, with_data_augmentation: bool, batch_size: int = 32): + self.batch_size = batch_size self.logs_dir = logs_dir self.with_data_augmentation = with_data_augmentation self.model = model @@ -41,19 +42,17 @@ class KerasClassifier(ClassifierABC): return ImageDataGenerator() return ImageDataGenerator(rotation_range=45, zoom_range=[0.8, 1]) # brightness_range=[0.7, 1.4] - def fit(self, images: List[Image], labels: List[int]) -> "KerasClassifier": - n_val: int = 371 # FIXME + def fit(self, images: List[Image], labels: List[int], validation_size: int) -> "KerasClassifier": images = asarray(images) labels = to_categorical(asarray(labels), 5) # FIXME - train_images, train_labels = images[:-n_val], labels[:-n_val] - val_images, val_labels = images[-n_val:], labels[-n_val:] + train_images, train_labels = images[:-validation_size], labels[:-validation_size] + val_images, val_labels = images[-validation_size:], labels[-validation_size:] - batch_size = 32 # FIXME - train_generator = self.train_data_gen.flow(train_images, train_labels, batch_size=batch_size, shuffle=True) + train_generator = self.train_data_gen.flow(train_images, train_labels, batch_size=self.batch_size, shuffle=True) self.model.fit( x=train_generator, - steps_per_epoch=len(train_images) / batch_size, + steps_per_epoch=len(train_images) / self.batch_size, validation_data=(val_images, val_labels), epochs=300, callbacks=[ @@ -102,7 +101,7 @@ def make_digits_cnn_pipeline( ) -> ArmorDigitPipeline: name = ( f"cnn - ({input_size}) - lr {lr} - " - + " / ".join("_".join(map(str, sizes)) for sizes in conv_blocks) + + " ".join("_".join(map(str, sizes)) for sizes in conv_blocks) + (" - with_data_augm" * with_data_augmentation) ) input_size = (input_size, input_size) @@ -179,8 +178,8 @@ if __name__ == "__main__": ROCODatasetsZoo.TWITCH.T470150052, ROCODatasetsZoo.TWITCH.T470149568, ROCODatasetsZoo.TWITCH.T470151286, - ROCODatasetsZoo.TWITCH.T470152289, ], + validation_roco_datasets=[ROCODatasetsZoo.TWITCH.T470152289], test_roco_datasets=[ ROCODatasetsZoo.TWITCH.T470152838, ROCODatasetsZoo.TWITCH.T470153081, @@ -190,12 +189,12 @@ if __name__ == "__main__": experiment_name="test-benchmarker", ) - random_pipeline = ArmorDigitPipeline.from_pipes([RandomClassifier()], name="random") + _report_dir = _benchmarker.reporter.report_dir - report_dir = _benchmarker.reporter.report_dir - cnn_pipelines = [ + _random_pipeline = ArmorDigitPipeline.from_pipes([RandomClassifier()], name="random") + _cnn_pipelines = [ make_digits_cnn_pipeline( - 32, ((32, 32), (64, 64)), report_dir, with_data_augmentation=with_data_augmentation, lr=lr, + 32, ((32, 32), (64, 64)), _report_dir, with_data_augmentation=with_data_augmentation, lr=lr, ) for with_data_augmentation in [False] for lr in [2.5e-2, 1.6e-2, 1e-2, 6.3e-3, 4e-4] @@ -209,12 +208,10 @@ if __name__ == "__main__": # ] vgg16_pipelines = [ - make_vgg16_pipeline(report_dir, input_size=32, with_data_augmentation=False, lr=lr) + make_vgg16_pipeline(_report_dir, input_size=32, with_data_augmentation=False, lr=lr) for lr in (1e-5, 5e-4, 2e-4, 1e-4, 5e-3) ] - logging.info(f"Run `tensorboard --logdir={report_dir}` for realtime logs") + logging.info(f"Run `tensorboard --logdir={_report_dir}` for realtime logs") - _benchmarker.benchmark( - [random_pipeline,] - ) + _benchmarker.benchmark([_random_pipeline] + _cnn_pipelines) diff --git a/robots-at-robots/research/robots_at_robots/evaluation/benchmark.py b/robots-at-robots/research/robots_at_robots/evaluation/benchmark.py index 045b13dc5da816a3ba4dfde5a42bf20c939b42eb..b9cb996993c64e9dde6afe989cbdcc312148ef3c 100644 --- a/robots-at-robots/research/robots_at_robots/evaluation/benchmark.py +++ b/robots-at-robots/research/robots_at_robots/evaluation/benchmark.py @@ -16,13 +16,14 @@ class Benchmarker: def __init__( self, train_datasets: List[FileImageDataset], + validation_datasets: List[FileImageDataset], test_datasets: List[FileImageDataset], evaluation_project: str, experiment_name: str, classes: List, ): - self.trainer = ImageClassificationPipelineTrainer(train_datasets) - self.evaluator = ImageClassificationPipelineEvaluator(train_datasets, test_datasets) + self.trainer = ImageClassificationPipelineTrainer(train_datasets, validation_datasets) + self.evaluator = ImageClassificationPipelineEvaluator(train_datasets, validation_datasets, test_datasets) self.reporter = ImagePipelineEvaluationReporter( evaluation_project, experiment_name, classes, other_metrics=[F1Metric()] ) @@ -34,6 +35,7 @@ class Benchmarker: def make_armor_value_benchmarker( train_roco_datasets: List[ROCODatasetBuilder], + validation_roco_datasets: List[ROCODatasetBuilder], test_roco_datasets: List[ROCODatasetBuilder], evaluation_project: str, experiment_name: str, @@ -41,8 +43,9 @@ def make_armor_value_benchmarker( classes: List, ): return Benchmarker( - dataset_generator.from_roco_datasets(train_roco_datasets), - dataset_generator.from_roco_datasets(test_roco_datasets), + train_datasets=dataset_generator.from_roco_datasets(train_roco_datasets), + validation_datasets=dataset_generator.from_roco_datasets(validation_roco_datasets), + test_datasets=dataset_generator.from_roco_datasets(test_roco_datasets), evaluation_project=evaluation_project, experiment_name=experiment_name, classes=classes, diff --git a/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py b/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py index 72996a9e2517ff298eca0dda612c3916fe8c55e3..bc36ef8a4e16f206d4b9b64c9eaaa7b6ad4463a1 100644 --- a/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py +++ b/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py @@ -52,8 +52,11 @@ class ImagePipelineEvaluationReporter(Generic[EnumT]): def _report_datasets(self): self._mf.title("Datasets", level=2) - self._mf.title("Training", level=3) + self._mf.title("Train-val", level=3) + self._mf.paragraph("Train") self._report_dataset(self._performances.train) + self._mf.paragraph("Val") + self._report_dataset(self._performances.validation) self._mf.title("Testing", level=3) self._report_dataset(self._performances.test) @@ -85,9 +88,11 @@ class ImagePipelineEvaluationReporter(Generic[EnumT]): self._mf.figure(fig_times, "aggregated_times.png") self._mf.paragraph("On test set:") - self._mf.table(self._make_aggregated_results_for_set(Set.TRAIN)) - self._mf.paragraph("On train set:") self._mf.table(self._make_aggregated_results_for_set(Set.TEST)) + self._mf.paragraph("On validation set:") + self._mf.table(self._make_aggregated_results_for_set(Set.VALIDATION)) + self._mf.paragraph("On train set:") + self._mf.table(self._make_aggregated_results_for_set(Set.TRAIN)) def _report_pipelines_results(self): for pipeline_name, performances in sorted( @@ -100,12 +105,15 @@ class ImagePipelineEvaluationReporter(Generic[EnumT]): def _report_pipeline_results(self, pipeline_name: str, performances: ClassificationPerformances): self._mf.title(pipeline_name, level=2) - self._mf.title("Train results", level=3) - self._report_pipeline_set_results(performances, Set.TRAIN) - self._mf.title("Test results", level=3) self._report_pipeline_set_results(performances, Set.TEST) + self._mf.title("Validation results", level=3) + self._report_pipeline_set_results(performances, Set.VALIDATION) + + self._mf.title("Train results", level=3) + self._report_pipeline_set_results(performances, Set.TRAIN) + def _report_pipeline_set_results(self, performances: ClassificationPerformances, set_: Set): performances = performances.on_set(set_) perf = performances.merge() @@ -204,7 +212,7 @@ class ImagePipelineEvaluationReporter(Generic[EnumT]): ).sort_values(["set", self.main_metric.name], ascending=[True, False]) df[f"{self.main_metric.name} "] = list(zip(df[self.main_metric.name], df.support)) - df["time "] = list(zip(df[self.main_metric.name], df.support)) + df["time "] = list(zip(df.time, df.support)) return ( _cat_pipeline_results(df, f"{self.main_metric.name} ", "{:.1%}", limits=(0, 1)), @@ -248,20 +256,19 @@ def _cat_pipeline_results( kind="bar", sharey=True, legend=False, - col_order=["test", "train"], - height=10, + col_order=["test", "validation", "train"], + height=8, estimator=weighted_mean, orient="v", ) - grid.set_xticklabels(rotation=30, ha="right") fig: Figure = grid.fig + grid.set_xticklabels(rotation=30, ha="right") _format_axes(fig.get_axes(), fmt, limits=limits, log_scale=log_scale) - fig.tight_layout() - fig.suptitle(y) + fig.tight_layout() return fig diff --git a/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluator.py b/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluator.py index 266de795dd14914537cade66ee3f0cd560aaf039..9f11ae38cb1071e8868ee9c13d38f5a3ec795782 100644 --- a/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluator.py +++ b/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluator.py @@ -1,5 +1,4 @@ from enum import Enum -from itertools import chain from time import time from typing import Generic, Iterable, List @@ -20,25 +19,24 @@ from research.robots_at_robots.evaluation.set import Set class ImageClassificationPipelineEvaluator(Generic[TargetT]): def __init__( - self, train_datasets: List[FileImageDataset], test_datasets: List[FileImageDataset], + self, + train_datasets: List[FileImageDataset], + validation_datasets: List[FileImageDataset], + test_datasets: List[FileImageDataset], ): - self.train_datasets = train_datasets - self.test_datasets = test_datasets + self.set2datasets = {Set.TRAIN: train_datasets, Set.VALIDATION: validation_datasets, Set.TEST: test_datasets} def evaluate_pipelines(self, pipelines: Iterable[ClassificationPipeline]) -> ClassificationPerformances: return ClassificationPerformances(flatten(self._evaluate_pipeline(pipeline) for pipeline in pipelines)) def _evaluate_pipeline(self, pipeline: ClassificationPipeline) -> Iterable[ContextualizedClassificationPerformance]: - return chain( - self._evaluate_pipeline_on_set(pipeline, self.train_datasets, Set.TRAIN), - self._evaluate_pipeline_on_set(pipeline, self.test_datasets, Set.TEST), - ) + for set_ in Set: + yield from self._evaluate_pipeline_on_set(pipeline, set_) - @staticmethod def _evaluate_pipeline_on_set( - pipeline: ClassificationPipeline, datasets: List[FileImageDataset], set_: Set + self, pipeline: ClassificationPipeline, set_: Set ) -> Iterable[ContextualizedClassificationPerformance]: - for dataset in datasets: + for dataset in self.set2datasets[set_]: t = time() proba, classes = pipeline.predict_proba_and_classes(file_images_to_images(dataset.examples)) mean_time = (time() - t) / len(dataset) diff --git a/robots-at-robots/research/robots_at_robots/evaluation/performance.py b/robots-at-robots/research/robots_at_robots/evaluation/performance.py index 33c0bc765a301b1bbe3c956948fa3351052cce6b..52c014c9b6348a7ddf6a556843806c2f09b908cb 100644 --- a/robots-at-robots/research/robots_at_robots/evaluation/performance.py +++ b/robots-at-robots/research/robots_at_robots/evaluation/performance.py @@ -49,6 +49,10 @@ class ClassificationPerformances(Iterable[ContextualizedClassificationPerformanc def test(self) -> "ClassificationPerformances": return self.on_set(Set.TEST) + @property + def validation(self) -> "ClassificationPerformances": + return self.on_set(Set.VALIDATION) + def on_set(self, set_: Set) -> "ClassificationPerformances": return ClassificationPerformances(SetClassificationPerformanceFilter(set_).filter(self.performances)) diff --git a/robots-at-robots/research/robots_at_robots/evaluation/set.py b/robots-at-robots/research/robots_at_robots/evaluation/set.py index 6175a68587d575e3d18cacf456ab99da45925220..0911a53bc040bf721a0cb9afa64235d2d416a3c8 100644 --- a/robots-at-robots/research/robots_at_robots/evaluation/set.py +++ b/robots-at-robots/research/robots_at_robots/evaluation/set.py @@ -8,6 +8,9 @@ class Set(Enum): VALIDATION = auto() TEST = auto() + def __hash__(self): + return hash(self.name) + def __repr__(self): return self.name.lower() diff --git a/robots-at-robots/research/robots_at_robots/evaluation/trainer.py b/robots-at-robots/research/robots_at_robots/evaluation/trainer.py index 6731cd00fd39b312d7e878eecc55fbb4d85adfb6..6f5940bb0ab56abc2ce15c8b81f932e30971d1f2 100644 --- a/robots-at-robots/research/robots_at_robots/evaluation/trainer.py +++ b/robots-at-robots/research/robots_at_robots/evaluation/trainer.py @@ -10,13 +10,14 @@ from research.common.datasets.union_dataset import UnionDataset class ImageClassificationPipelineTrainer(Generic[TargetT]): - def __init__(self, training_datasets: List[FileImageDataset]): - train_dataset = UnionDataset(training_datasets) - self.images = file_images_to_images(train_dataset.examples) - self.labels = train_dataset.targets + def __init__(self, training_datasets: List[FileImageDataset], validation_datasets: List[FileImageDataset]): + dataset = UnionDataset(training_datasets + validation_datasets) + self.validation_size = sum(len(d) for d in validation_datasets) + self.images = file_images_to_images(dataset.examples) + self.labels = dataset.targets def train_pipeline(self, pipeline: ClassificationPipeline): - pipeline.fit(self.images, self.labels) + pipeline.fit(self.images, self.labels, validation_size=self.validation_size) def train_pipelines(self, pipelines: List[ClassificationPipeline]): tqdm_pipelines = tqdm(pipelines, desc="Training Pipelines")