From aa47d12d8c04076b11c43ee6c0485a17a126f782 Mon Sep 17 00:00:00 2001
From: Mathieu Beligon <mathieu@feedly.com>
Date: Mon, 14 Dec 2020 21:27:55 +0100
Subject: [PATCH] [common] (pipeline) Add validation support

---
 .../classification/classification_pipeline.py |  4 ++-
 .../pipeline/classification/classifier_abc.py |  2 +-
 .../pipeline/classification/random_model.py   |  2 +-
 common/polystar/common/utils/markdown.py      |  2 +-
 .../armor_color/armor_color_benchmarker.py    |  6 +++-
 .../robots_at_robots/armor_color/benchmark.py |  7 ++--
 .../armor_digit/armor_digit_benchmarker.py    |  6 +++-
 .../robots_at_robots/armor_digit/benchmark.py | 35 +++++++++----------
 .../robots_at_robots/evaluation/benchmark.py  | 13 ++++---
 .../image_pipeline_evaluation_reporter.py     |  2 +-
 .../robots_at_robots/evaluation/trainer.py    | 11 +++---
 11 files changed, 52 insertions(+), 38 deletions(-)

diff --git a/common/polystar/common/pipeline/classification/classification_pipeline.py b/common/polystar/common/pipeline/classification/classification_pipeline.py
index 99c85f5..56086c9 100644
--- a/common/polystar/common/pipeline/classification/classification_pipeline.py
+++ b/common/polystar/common/pipeline/classification/classification_pipeline.py
@@ -22,7 +22,9 @@ class ClassificationPipeline(Pipeline, Generic[IT, EnumT], ABC):
     def classifier(self) -> ClassifierABC:
         return self.steps[-1][-1]
 
-    def fit(self, x: Sequence[IT], y: List[EnumT], **fit_params):
+    def fit(self, x: Sequence[IT], y: List[EnumT], validation_size: int = 0, **fit_params):
+        if isinstance(self.classifier, ClassifierABC):
+            fit_params[f"{self.classifier.__class__.__name__}__validation_size"] = validation_size
         y_indices = _labels_to_indices(y)
         return super().fit(x, y_indices, **fit_params)
 
diff --git a/common/polystar/common/pipeline/classification/classifier_abc.py b/common/polystar/common/pipeline/classification/classifier_abc.py
index 64c89b9..3016baf 100644
--- a/common/polystar/common/pipeline/classification/classifier_abc.py
+++ b/common/polystar/common/pipeline/classification/classifier_abc.py
@@ -10,7 +10,7 @@ from polystar.common.utils.named_mixin import NamedMixin
 class ClassifierABC(BaseEstimator, NamedMixin, Generic[IT], ABC):
     n_classes: int
 
-    def fit(self, examples: List[IT], label_indices: List[int]) -> "ClassifierABC":
+    def fit(self, examples: List[IT], label_indices: List[int], validation_size: int) -> "ClassifierABC":
         return self
 
     @abstractmethod
diff --git a/common/polystar/common/pipeline/classification/random_model.py b/common/polystar/common/pipeline/classification/random_model.py
index d6a13a9..9080f7a 100644
--- a/common/polystar/common/pipeline/classification/random_model.py
+++ b/common/polystar/common/pipeline/classification/random_model.py
@@ -11,7 +11,7 @@ class RandomClassifier(RuleBasedClassifierABC):
     def predict(self, examples: np.ndarray) -> List[int]:
         return choice(range(self.n_classes), size=len(examples), replace=True, p=self.weights_)
 
-    def fit(self, examples: List, label_indices: List[int]) -> "RandomClassifier":
+    def fit(self, examples: List, label_indices: List[int], validation_size: int) -> "RandomClassifier":
         indices2counts = Counter(label_indices)
         self.weights_ = [indices2counts[i] / len(label_indices) for i in range(self.n_classes)]
         return self
diff --git a/common/polystar/common/utils/markdown.py b/common/polystar/common/utils/markdown.py
index 3997375..1aa5e5f 100644
--- a/common/polystar/common/utils/markdown.py
+++ b/common/polystar/common/utils/markdown.py
@@ -33,7 +33,7 @@ class MarkdownFile:
         return self
 
     def image(self, relative_path: str, alt: str = "img") -> "MarkdownFile":
-        self.paragraph(f"![{alt}]({relative_path})")
+        self.paragraph(f"![{alt}]({str(relative_path).replace(' ', '%20')})")
         return self
 
     def figure(self, figure: Figure, name: str, alt: str = "img"):
diff --git a/robots-at-robots/research/robots_at_robots/armor_color/armor_color_benchmarker.py b/robots-at-robots/research/robots_at_robots/armor_color/armor_color_benchmarker.py
index a01bf0d..37a9e35 100644
--- a/robots-at-robots/research/robots_at_robots/armor_color/armor_color_benchmarker.py
+++ b/robots-at-robots/research/robots_at_robots/armor_color/armor_color_benchmarker.py
@@ -7,11 +7,15 @@ from research.robots_at_robots.evaluation.benchmark import make_armor_value_benc
 
 
 def make_armor_color_benchmarker(
-    train_roco_datasets: List[ROCODatasetBuilder], test_roco_datasets: List[ROCODatasetBuilder], experiment_name: str
+    train_roco_datasets: List[ROCODatasetBuilder],
+    validation_roco_datasets: List[ROCODatasetBuilder],
+    test_roco_datasets: List[ROCODatasetBuilder],
+    experiment_name: str,
 ):
     dataset_generator = make_armor_color_dataset_generator()
     return make_armor_value_benchmarker(
         train_roco_datasets=train_roco_datasets,
+        validation_roco_datasets=validation_roco_datasets,
         test_roco_datasets=test_roco_datasets,
         evaluation_project="armor-color",
         experiment_name=experiment_name,
diff --git a/robots-at-robots/research/robots_at_robots/armor_color/benchmark.py b/robots-at-robots/research/robots_at_robots/armor_color/benchmark.py
index 1ac6f2b..441fb0d 100644
--- a/robots-at-robots/research/robots_at_robots/armor_color/benchmark.py
+++ b/robots-at-robots/research/robots_at_robots/armor_color/benchmark.py
@@ -37,19 +37,20 @@ if __name__ == "__main__":
     logging.getLogger().setLevel("INFO")
 
     _benchmarker = make_armor_color_benchmarker(
-        [
+        train_roco_datasets=[
             ROCODatasetsZoo.TWITCH.T470150052,
             ROCODatasetsZoo.TWITCH.T470152289,
             ROCODatasetsZoo.TWITCH.T470149568,
             ROCODatasetsZoo.TWITCH.T470151286,
         ],
-        [
+        validation_roco_datasets=[],
+        test_roco_datasets=[
             ROCODatasetsZoo.TWITCH.T470152838,
             ROCODatasetsZoo.TWITCH.T470153081,
             ROCODatasetsZoo.TWITCH.T470158483,
             ROCODatasetsZoo.TWITCH.T470152730,
         ],
-        "test",
+        experiment_name="test",
     )
 
     red_blue_comparison_pipeline = ArmorColorPipeline.from_pipes(
diff --git a/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_benchmarker.py b/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_benchmarker.py
index f4792c4..d55d54a 100644
--- a/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_benchmarker.py
+++ b/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_benchmarker.py
@@ -7,11 +7,15 @@ from research.robots_at_robots.evaluation.benchmark import make_armor_value_benc
 
 
 def make_armor_digit_benchmarker(
-    train_roco_datasets: List[ROCODatasetBuilder], test_roco_datasets: List[ROCODatasetBuilder], experiment_name: str
+    train_roco_datasets: List[ROCODatasetBuilder],
+    validation_roco_datasets: List[ROCODatasetBuilder],
+    test_roco_datasets: List[ROCODatasetBuilder],
+    experiment_name: str,
 ):
     dataset_generator = make_armor_digit_dataset_generator()
     return make_armor_value_benchmarker(
         train_roco_datasets=train_roco_datasets,
+        validation_roco_datasets=validation_roco_datasets,
         test_roco_datasets=test_roco_datasets,
         evaluation_project="armor-digit",
         experiment_name=experiment_name,
diff --git a/robots-at-robots/research/robots_at_robots/armor_digit/benchmark.py b/robots-at-robots/research/robots_at_robots/armor_digit/benchmark.py
index 1b48d0e..1c7a80f 100644
--- a/robots-at-robots/research/robots_at_robots/armor_digit/benchmark.py
+++ b/robots-at-robots/research/robots_at_robots/armor_digit/benchmark.py
@@ -29,7 +29,8 @@ class ArmorDigitPipeline(ClassificationPipeline):
 
 
 class KerasClassifier(ClassifierABC):
-    def __init__(self, model: Model, optimizer, logs_dir: Path, with_data_augmentation: bool):
+    def __init__(self, model: Model, optimizer, logs_dir: Path, with_data_augmentation: bool, batch_size: int = 32):
+        self.batch_size = batch_size
         self.logs_dir = logs_dir
         self.with_data_augmentation = with_data_augmentation
         self.model = model
@@ -41,19 +42,17 @@ class KerasClassifier(ClassifierABC):
             return ImageDataGenerator()
         return ImageDataGenerator(rotation_range=45, zoom_range=[0.8, 1])  # brightness_range=[0.7, 1.4]
 
-    def fit(self, images: List[Image], labels: List[int]) -> "KerasClassifier":
-        n_val: int = 371  # FIXME
+    def fit(self, images: List[Image], labels: List[int], validation_size: int) -> "KerasClassifier":
         images = asarray(images)
         labels = to_categorical(asarray(labels), 5)  # FIXME
-        train_images, train_labels = images[:-n_val], labels[:-n_val]
-        val_images, val_labels = images[-n_val:], labels[-n_val:]
+        train_images, train_labels = images[:-validation_size], labels[:-validation_size]
+        val_images, val_labels = images[-validation_size:], labels[-validation_size:]
 
-        batch_size = 32  # FIXME
-        train_generator = self.train_data_gen.flow(train_images, train_labels, batch_size=batch_size, shuffle=True)
+        train_generator = self.train_data_gen.flow(train_images, train_labels, batch_size=self.batch_size, shuffle=True)
 
         self.model.fit(
             x=train_generator,
-            steps_per_epoch=len(train_images) / batch_size,
+            steps_per_epoch=len(train_images) / self.batch_size,
             validation_data=(val_images, val_labels),
             epochs=300,
             callbacks=[
@@ -102,7 +101,7 @@ def make_digits_cnn_pipeline(
 ) -> ArmorDigitPipeline:
     name = (
         f"cnn - ({input_size}) - lr {lr} - "
-        + " / ".join("_".join(map(str, sizes)) for sizes in conv_blocks)
+        + " ".join("_".join(map(str, sizes)) for sizes in conv_blocks)
         + (" - with_data_augm" * with_data_augmentation)
     )
     input_size = (input_size, input_size)
@@ -179,8 +178,8 @@ if __name__ == "__main__":
             ROCODatasetsZoo.TWITCH.T470150052,
             ROCODatasetsZoo.TWITCH.T470149568,
             ROCODatasetsZoo.TWITCH.T470151286,
-            ROCODatasetsZoo.TWITCH.T470152289,
         ],
+        validation_roco_datasets=[ROCODatasetsZoo.TWITCH.T470152289],
         test_roco_datasets=[
             ROCODatasetsZoo.TWITCH.T470152838,
             ROCODatasetsZoo.TWITCH.T470153081,
@@ -190,12 +189,12 @@ if __name__ == "__main__":
         experiment_name="test-benchmarker",
     )
 
-    random_pipeline = ArmorDigitPipeline.from_pipes([RandomClassifier()], name="random")
+    _report_dir = _benchmarker.reporter.report_dir
 
-    report_dir = _benchmarker.reporter.report_dir
-    cnn_pipelines = [
+    _random_pipeline = ArmorDigitPipeline.from_pipes([RandomClassifier()], name="random")
+    _cnn_pipelines = [
         make_digits_cnn_pipeline(
-            32, ((32, 32), (64, 64)), report_dir, with_data_augmentation=with_data_augmentation, lr=lr,
+            32, ((32, 32), (64, 64)), _report_dir, with_data_augmentation=with_data_augmentation, lr=lr,
         )
         for with_data_augmentation in [False]
         for lr in [2.5e-2, 1.6e-2, 1e-2, 6.3e-3, 4e-4]
@@ -209,12 +208,10 @@ if __name__ == "__main__":
     # ]
 
     vgg16_pipelines = [
-        make_vgg16_pipeline(report_dir, input_size=32, with_data_augmentation=False, lr=lr)
+        make_vgg16_pipeline(_report_dir, input_size=32, with_data_augmentation=False, lr=lr)
         for lr in (1e-5, 5e-4, 2e-4, 1e-4, 5e-3)
     ]
 
-    logging.info(f"Run `tensorboard --logdir={report_dir}` for realtime logs")
+    logging.info(f"Run `tensorboard --logdir={_report_dir}` for realtime logs")
 
-    _benchmarker.benchmark(
-        [random_pipeline,]
-    )
+    _benchmarker.benchmark([_random_pipeline] + _cnn_pipelines)
diff --git a/robots-at-robots/research/robots_at_robots/evaluation/benchmark.py b/robots-at-robots/research/robots_at_robots/evaluation/benchmark.py
index 045b13d..7cc7b2d 100644
--- a/robots-at-robots/research/robots_at_robots/evaluation/benchmark.py
+++ b/robots-at-robots/research/robots_at_robots/evaluation/benchmark.py
@@ -16,13 +16,16 @@ class Benchmarker:
     def __init__(
         self,
         train_datasets: List[FileImageDataset],
+        validation_datasets: List[FileImageDataset],
         test_datasets: List[FileImageDataset],
         evaluation_project: str,
         experiment_name: str,
         classes: List,
     ):
-        self.trainer = ImageClassificationPipelineTrainer(train_datasets)
-        self.evaluator = ImageClassificationPipelineEvaluator(train_datasets, test_datasets)
+        self.trainer = ImageClassificationPipelineTrainer(train_datasets, validation_datasets)
+        self.evaluator = ImageClassificationPipelineEvaluator(
+            train_datasets + validation_datasets, test_datasets
+        )  # FIXME
         self.reporter = ImagePipelineEvaluationReporter(
             evaluation_project, experiment_name, classes, other_metrics=[F1Metric()]
         )
@@ -34,6 +37,7 @@ class Benchmarker:
 
 def make_armor_value_benchmarker(
     train_roco_datasets: List[ROCODatasetBuilder],
+    validation_roco_datasets: List[ROCODatasetBuilder],
     test_roco_datasets: List[ROCODatasetBuilder],
     evaluation_project: str,
     experiment_name: str,
@@ -41,8 +45,9 @@ def make_armor_value_benchmarker(
     classes: List,
 ):
     return Benchmarker(
-        dataset_generator.from_roco_datasets(train_roco_datasets),
-        dataset_generator.from_roco_datasets(test_roco_datasets),
+        train_datasets=dataset_generator.from_roco_datasets(train_roco_datasets),
+        validation_datasets=dataset_generator.from_roco_datasets(validation_roco_datasets),
+        test_datasets=dataset_generator.from_roco_datasets(test_roco_datasets),
         evaluation_project=evaluation_project,
         experiment_name=experiment_name,
         classes=classes,
diff --git a/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py b/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py
index 72996a9..1dab417 100644
--- a/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py
+++ b/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py
@@ -204,7 +204,7 @@ class ImagePipelineEvaluationReporter(Generic[EnumT]):
         ).sort_values(["set", self.main_metric.name], ascending=[True, False])
 
         df[f"{self.main_metric.name} "] = list(zip(df[self.main_metric.name], df.support))
-        df["time "] = list(zip(df[self.main_metric.name], df.support))
+        df["time "] = list(zip(df.time, df.support))
 
         return (
             _cat_pipeline_results(df, f"{self.main_metric.name} ", "{:.1%}", limits=(0, 1)),
diff --git a/robots-at-robots/research/robots_at_robots/evaluation/trainer.py b/robots-at-robots/research/robots_at_robots/evaluation/trainer.py
index 6731cd0..6f5940b 100644
--- a/robots-at-robots/research/robots_at_robots/evaluation/trainer.py
+++ b/robots-at-robots/research/robots_at_robots/evaluation/trainer.py
@@ -10,13 +10,14 @@ from research.common.datasets.union_dataset import UnionDataset
 
 
 class ImageClassificationPipelineTrainer(Generic[TargetT]):
-    def __init__(self, training_datasets: List[FileImageDataset]):
-        train_dataset = UnionDataset(training_datasets)
-        self.images = file_images_to_images(train_dataset.examples)
-        self.labels = train_dataset.targets
+    def __init__(self, training_datasets: List[FileImageDataset], validation_datasets: List[FileImageDataset]):
+        dataset = UnionDataset(training_datasets + validation_datasets)
+        self.validation_size = sum(len(d) for d in validation_datasets)
+        self.images = file_images_to_images(dataset.examples)
+        self.labels = dataset.targets
 
     def train_pipeline(self, pipeline: ClassificationPipeline):
-        pipeline.fit(self.images, self.labels)
+        pipeline.fit(self.images, self.labels, validation_size=self.validation_size)
 
     def train_pipelines(self, pipelines: List[ClassificationPipeline]):
         tqdm_pipelines = tqdm(pipelines, desc="Training Pipelines")
-- 
GitLab