From 5a80eb82f7b0b16aacf01a386c8e3d6ea0b7813f Mon Sep 17 00:00:00 2001
From: Mathieu Beligon <mathieu@feedly.com>
Date: Mon, 14 Sep 2020 16:47:13 +0200
Subject: [PATCH] [robots@robots] (datasets) adapt to the new format

---
 .../armor_color/armor_color_dataset.py        | 37 +++++++++
 .../armor_color_pipeline_reporter_factory.py  |  9 ++-
 .../armor_color/baseline_experiments.py       | 15 ++--
 .../armor_digit/armor_digit_dataset.py        | 44 +++++++++++
 .../armor_digit_pipeline_reporter_factory.py  | 14 ++--
 .../armor_digit/baseline_experiments.py       |  9 ++-
 .../dataset/armor_dataset_factory.py          | 53 +++++++------
 .../dataset/armor_value_dataset.py            | 77 +++++++++++++++++++
 8 files changed, 216 insertions(+), 42 deletions(-)
 create mode 100644 robots-at-robots/research/robots_at_robots/armor_color/armor_color_dataset.py
 create mode 100644 robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_dataset.py
 create mode 100644 robots-at-robots/research/robots_at_robots/dataset/armor_value_dataset.py

diff --git a/robots-at-robots/research/robots_at_robots/armor_color/armor_color_dataset.py b/robots-at-robots/research/robots_at_robots/armor_color/armor_color_dataset.py
new file mode 100644
index 0000000..1dc6b23
--- /dev/null
+++ b/robots-at-robots/research/robots_at_robots/armor_color/armor_color_dataset.py
@@ -0,0 +1,37 @@
+from itertools import islice
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+from polystar.common.models.object import Armor
+from research.common.datasets.dataset import Dataset
+from research.common.datasets.image_dataset import open_file_dataset
+from research.common.datasets.roco.zoo.roco_datasets_zoo import ROCODatasetsZoo
+from research.robots_at_robots.dataset.armor_value_dataset import (
+    ArmorValueDatasetGenerator, ArmorValueDirectoryDataset)
+
+
+class ArmorColorDirectoryDataset(ArmorValueDirectoryDataset[str]):
+    @staticmethod
+    def _value_from_str(label: str) -> str:
+        return label
+
+
+class ArmorColorDatasetGenerator(ArmorValueDatasetGenerator[str]):
+    def __init__(self):
+        super().__init__("colors")
+
+    def _value_from_armor(self, armor: Armor) -> str:
+        return armor.color.name.lower()
+
+    def from_directory_and_name(self, directory: Path, name: str) -> Dataset[Path, str]:
+        return ArmorColorDirectoryDataset(directory, name)
+
+
+if __name__ == "__main__":
+    _dataset = open_file_dataset(ArmorColorDatasetGenerator().from_roco_dataset(ROCODatasetsZoo.TWITCH.T470150052))
+
+    for _image, _value, _name in islice(_dataset, 40, 50):
+        print(_value)
+        plt.imshow(_image)
+        plt.show()
+        plt.clf()
diff --git a/robots-at-robots/research/robots_at_robots/armor_color/armor_color_pipeline_reporter_factory.py b/robots-at-robots/research/robots_at_robots/armor_color/armor_color_pipeline_reporter_factory.py
index 86e4171..4b790de 100644
--- a/robots-at-robots/research/robots_at_robots/armor_color/armor_color_pipeline_reporter_factory.py
+++ b/robots-at-robots/research/robots_at_robots/armor_color/armor_color_pipeline_reporter_factory.py
@@ -1,9 +1,12 @@
 from typing import List
 
 from research.common.dataset.directory_roco_dataset import DirectoryROCODataset
-from research.common.image_pipeline_evaluation.image_pipeline_evaluation_reporter import ImagePipelineEvaluationReporter
-from research.common.image_pipeline_evaluation.image_pipeline_evaluator import ImagePipelineEvaluator
-from research.robots_at_robots.dataset.armor_color_dataset_factory import ArmorColorDatasetGenerator
+from research.common.image_pipeline_evaluation.image_pipeline_evaluation_reporter import \
+    ImagePipelineEvaluationReporter
+from research.common.image_pipeline_evaluation.image_pipeline_evaluator import \
+    ImagePipelineEvaluator
+from research.robots_at_robots.armor_color.armor_color_dataset import \
+    ArmorColorDatasetGenerator
 
 
 class ArmorColorPipelineReporterFactory:
diff --git a/robots-at-robots/research/robots_at_robots/armor_color/baseline_experiments.py b/robots-at-robots/research/robots_at_robots/armor_color/baseline_experiments.py
index 9ef9a93..5ee849c 100644
--- a/robots-at-robots/research/robots_at_robots/armor_color/baseline_experiments.py
+++ b/robots-at-robots/research/robots_at_robots/armor_color/baseline_experiments.py
@@ -1,10 +1,13 @@
 import logging
 
-from polystar.common.image_pipeline.classifier_image_pipeline import ClassifierImagePipeline
-from polystar.common.image_pipeline.image_featurizer.mean_rgb_channels_featurizer import MeanChannelsFeaturizer
+from polystar.common.image_pipeline.classifier_image_pipeline import \
+    ClassifierImagePipeline
+from polystar.common.image_pipeline.image_featurizer.mean_rgb_channels_featurizer import \
+    MeanChannelsFeaturizer
 from polystar.common.image_pipeline.models.random_model import RandomModel
-from polystar.common.image_pipeline.models.red_blue_channels_comparison_model import RedBlueComparisonModel
-from research.common.dataset.twitch.twitch_roco_datasets import TwitchROCODataset
+from polystar.common.image_pipeline.models.red_blue_channels_comparison_model import \
+    RedBlueComparisonModel
+from research.common.datasets.roco.zoo.roco_datasets_zoo import ROCODatasetsZoo
 from research.robots_at_robots.armor_color.armor_color_pipeline_reporter_factory import \
     ArmorColorPipelineReporterFactory
 
@@ -12,8 +15,8 @@ if __name__ == "__main__":
     logging.getLogger().setLevel("INFO")
 
     reporter = ArmorColorPipelineReporterFactory.from_roco_datasets(
-        train_roco_datasets=[TwitchROCODataset.TWITCH_470151286, TwitchROCODataset.TWITCH_470150052],
-        test_roco_datasets=[TwitchROCODataset.TWITCH_470152289],
+        train_roco_datasets=[ROCODatasetsZoo.TWITCH.T470151286, ROCODatasetsZoo.TWITCH.T470150052],
+        test_roco_datasets=[ROCODatasetsZoo.TWITCH.T470152289],
     )
 
     red_blue_comparison_pipeline = ClassifierImagePipeline(
diff --git a/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_dataset.py b/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_dataset.py
new file mode 100644
index 0000000..da331fd
--- /dev/null
+++ b/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_dataset.py
@@ -0,0 +1,44 @@
+from itertools import islice
+from pathlib import Path
+from typing import Iterable
+
+import matplotlib.pyplot as plt
+from polystar.common.filters.keep_filter import KeepFilter
+from polystar.common.models.object import Armor
+from research.common.datasets.dataset import Dataset
+from research.common.datasets.filtered_dataset import FilteredTargetsDataset
+from research.common.datasets.image_dataset import open_file_dataset
+from research.common.datasets.roco.zoo.roco_datasets_zoo import ROCODatasetsZoo
+from research.robots_at_robots.dataset.armor_value_dataset import (
+    ArmorValueDatasetGenerator, ArmorValueDirectoryDataset)
+
+
+class ArmorDigitDirectoryDataset(ArmorValueDirectoryDataset[int]):
+    @staticmethod
+    def _value_from_str(label: str) -> int:
+        return int(label)
+
+
+class ArmorDigitDatasetGenerator(ArmorValueDatasetGenerator[str]):
+    def __init__(self, acceptable_digits: Iterable[int]):
+        super().__init__("digits")
+        self.acceptable_digits = acceptable_digits
+
+    def _value_from_armor(self, armor: Armor) -> int:
+        return armor.number
+
+    def from_directory_and_name(self, directory: Path, name: str) -> Dataset[Path, int]:
+        full_dataset = ArmorDigitDirectoryDataset(directory, name)
+        return FilteredTargetsDataset(full_dataset, KeepFilter(self.acceptable_digits))
+
+
+if __name__ == "__main__":
+    _dataset = open_file_dataset(
+        ArmorDigitDatasetGenerator((1, 2, 3, 4, 5, 7)).from_roco_dataset(ROCODatasetsZoo.TWITCH.T470150052)
+    )
+
+    for _image, _value, _name in islice(_dataset, 40, 50):
+        print(_value)
+        plt.imshow(_image)
+        plt.show()
+        plt.clf()
diff --git a/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_pipeline_reporter_factory.py b/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_pipeline_reporter_factory.py
index 7d54e8d..9aa086e 100644
--- a/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_pipeline_reporter_factory.py
+++ b/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_pipeline_reporter_factory.py
@@ -1,9 +1,13 @@
 from typing import Iterable, List
 
-from research.common.dataset.directory_roco_dataset import DirectoryROCODataset
-from research.common.image_pipeline_evaluation.image_pipeline_evaluation_reporter import ImagePipelineEvaluationReporter
-from research.common.image_pipeline_evaluation.image_pipeline_evaluator import ImagePipelineEvaluator
-from research.robots_at_robots.dataset.armor_digit_dataset_factory import ArmorDigitDatasetGenerator
+from research.common.datasets.roco.directory_roco_dataset import \
+    DirectoryROCODataset
+from research.common.image_pipeline_evaluation.image_pipeline_evaluation_reporter import \
+    ImagePipelineEvaluationReporter
+from research.common.image_pipeline_evaluation.image_pipeline_evaluator import \
+    ImagePipelineEvaluator
+from research.robots_at_robots.armor_digit.armor_digit_dataset import \
+    ArmorDigitDatasetGenerator
 
 
 class ArmorDigitPipelineReporterFactory:
@@ -17,7 +21,7 @@ class ArmorDigitPipelineReporterFactory:
             evaluator=ImagePipelineEvaluator(
                 train_roco_datasets=train_roco_datasets,
                 test_roco_datasets=test_roco_datasets,
-                image_dataset_generator=ArmorDigitDatasetGenerator(set(acceptable_digits)),
+                image_dataset_generator=ArmorDigitDatasetGenerator(acceptable_digits),
             ),
             evaluation_project="armor-digit",
         )
diff --git a/robots-at-robots/research/robots_at_robots/armor_digit/baseline_experiments.py b/robots-at-robots/research/robots_at_robots/armor_digit/baseline_experiments.py
index 8dd1dcf..132cb96 100644
--- a/robots-at-robots/research/robots_at_robots/armor_digit/baseline_experiments.py
+++ b/robots-at-robots/research/robots_at_robots/armor_digit/baseline_experiments.py
@@ -1,8 +1,9 @@
 import logging
 
-from polystar.common.image_pipeline.classifier_image_pipeline import ClassifierImagePipeline
+from polystar.common.image_pipeline.classifier_image_pipeline import \
+    ClassifierImagePipeline
 from polystar.common.image_pipeline.models.random_model import RandomModel
-from research.common.dataset.twitch.twitch_roco_datasets import TwitchROCODataset
+from research.common.datasets.roco.zoo.roco_datasets_zoo import ROCODatasetsZoo
 from research.robots_at_robots.armor_digit.armor_digit_pipeline_reporter_factory import \
     ArmorDigitPipelineReporterFactory
 
@@ -10,8 +11,8 @@ if __name__ == "__main__":
     logging.getLogger().setLevel("INFO")
 
     reporter = ArmorDigitPipelineReporterFactory.from_roco_datasets(
-        train_roco_datasets=[TwitchROCODataset.TWITCH_470151286, TwitchROCODataset.TWITCH_470150052],
-        test_roco_datasets=[TwitchROCODataset.TWITCH_470152289],
+        train_roco_datasets=[ROCODatasetsZoo.TWITCH.T470151286, ROCODatasetsZoo.TWITCH.T470150052],
+        test_roco_datasets=[ROCODatasetsZoo.TWITCH.T470152289],
     )
 
     random_pipeline = ClassifierImagePipeline(model=RandomModel(), custom_name="random")
diff --git a/robots-at-robots/research/robots_at_robots/dataset/armor_dataset_factory.py b/robots-at-robots/research/robots_at_robots/dataset/armor_dataset_factory.py
index a5138d4..10f1a6b 100644
--- a/robots-at-robots/research/robots_at_robots/dataset/armor_dataset_factory.py
+++ b/robots-at-robots/research/robots_at_robots/dataset/armor_dataset_factory.py
@@ -1,38 +1,43 @@
-from pathlib import Path
-from typing import Iterable, List, Tuple
+from itertools import islice
+from typing import Iterator, List, Tuple
 
 import matplotlib.pyplot as plt
-
 from polystar.common.models.image import Image
-from polystar.common.models.image_annotation import ImageAnnotation
 from polystar.common.models.object import Armor, ObjectType
-from polystar.common.target_pipeline.objects_validators.type_object_validator import TypeObjectValidator
-from research.common.dataset.dji.dji_roco_datasets import DJIROCODataset
-from research.common.dataset.roco_dataset import ROCODataset
+from polystar.common.target_pipeline.objects_validators.type_object_validator import \
+    TypeObjectValidator
+from research.common.datasets.dataset import Dataset, GeneratorDataset
+from research.common.datasets.roco.roco_annotation import ROCOAnnotation
+from research.common.datasets.roco.roco_dataset import (ROCODataset,
+                                                        ROCOFileDataset)
+from research.common.datasets.roco.zoo.roco_datasets_zoo import ROCODatasetsZoo
+
+ArmorDataset = Dataset[Image, Armor]
 
 
 class ArmorDatasetFactory:
-    @staticmethod
-    def from_image_annotation(image_annotation: ImageAnnotation) -> Iterable[Tuple[Image, Armor, int, Path]]:
-        img = image_annotation.image
-        armors: List[Armor] = TypeObjectValidator(ObjectType.Armor).filter(image_annotation.objects, img)
-        for i, obj in enumerate(armors):
-            croped_img = img[obj.box.y1 : obj.box.y2, obj.box.x1 : obj.box.x2]
-            yield croped_img, obj, i, image_annotation.image_path
+    def __init__(self, dataset: ROCOFileDataset):
+        self.dataset: ROCODataset = dataset.open()
+
+    def make(self) -> ArmorDataset:
+        return GeneratorDataset(f"{self.dataset.name}_armors", self._make_generator)
+
+    def _make_generator(self) -> Iterator[Tuple[Image, Armor, str]]:
+        for image, annotation, name in self.dataset:
+            yield from self._generate_from_single(image, annotation, name)
 
     @staticmethod
-    def from_dataset(dataset: ROCODataset) -> Iterable[Tuple[Image, Armor, int, Path]]:
-        for image_annotation in dataset.image_annotations:
-            for rv in ArmorDatasetFactory.from_image_annotation(image_annotation):
-                yield rv
+    def _generate_from_single(image: Image, annotation: ROCOAnnotation, name) -> Iterator[Tuple[Image, Armor, str]]:
+        armors: List[Armor] = TypeObjectValidator(ObjectType.Armor).filter(annotation.objects, image)
+
+        for i, obj in enumerate(armors):
+            croped_img = image[obj.box.y1 : obj.box.y2, obj.box.x1 : obj.box.x2]
+            yield croped_img, obj, f"{name}-{i}"
 
 
 if __name__ == "__main__":
-    for i, (armor_img, armor, k, p) in enumerate(ArmorDatasetFactory.from_dataset(DJIROCODataset.CentralChina)):
-        print(armor, armor.color, armor.number, "-", k, "in", p)
-        plt.imshow(armor_img)
+    for _armor_img, _armor, _name in islice(ArmorDatasetFactory(ROCODatasetsZoo.DJI.CentralChina).make(), 20, 30):
+        print(_name, repr(_armor))
+        plt.imshow(_armor_img)
         plt.show()
         plt.clf()
-
-        if i == 50:
-            break
diff --git a/robots-at-robots/research/robots_at_robots/dataset/armor_value_dataset.py b/robots-at-robots/research/robots_at_robots/dataset/armor_value_dataset.py
new file mode 100644
index 0000000..2e6609b
--- /dev/null
+++ b/robots-at-robots/research/robots_at_robots/dataset/armor_value_dataset.py
@@ -0,0 +1,77 @@
+import json
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import ClassVar, Generic, List, TypeVar
+
+from polystar.common.models.image import Image, save_image
+from polystar.common.models.object import Armor
+from polystar.common.utils.time import create_time_id
+from polystar.common.utils.tqdm import smart_tqdm
+from research.common.datasets.dataset import Dataset
+from research.common.datasets.image_dataset import ImageDirectoryDataset
+from research.common.datasets.roco.directory_roco_dataset import \
+    DirectoryROCODataset
+from research.common.datasets.union_dataset import UnionDataset
+from research.robots_at_robots.dataset.armor_dataset_factory import \
+    ArmorDatasetFactory
+
+ValueT = TypeVar("ValueT")
+
+
+class ArmorValueDirectoryDataset(Generic[ValueT], ImageDirectoryDataset[ValueT], ABC):
+    def target_from_image_file(self, image_file: Path) -> ValueT:
+        return self._value_from_str(image_file.stem.split("-")[-1])
+
+    @abstractmethod
+    def _value_from_str(self, label: str) -> ValueT:
+        pass
+
+
+class ArmorValueDatasetGenerator(Generic[ValueT], ABC):
+    VERSION: ClassVar[str] = "1.0"
+
+    def __init__(self, task_name: str):
+        self.task_name = task_name
+
+    def from_roco_datasets(self, roco_datasets: List[DirectoryROCODataset]) -> UnionDataset[Path, ValueT]:
+        return UnionDataset(map(self.from_roco_dataset, roco_datasets))
+
+    def from_roco_dataset(self, roco_dataset: DirectoryROCODataset) -> Dataset[Path, ValueT]:
+        self._generate_if_absent(roco_dataset)
+        return self.from_directory_and_name(
+            roco_dataset.main_dir / self.task_name, f"{roco_dataset.name}_armor_{self.task_name}"
+        )
+
+    @abstractmethod
+    def from_directory_and_name(self, directory: Path, name: str) -> Dataset[Path, ValueT]:
+        pass
+
+    def _generate_if_absent(self, roco_dataset: DirectoryROCODataset):
+        if self._exists_and_is_valid(roco_dataset):
+            return
+        self._generate(roco_dataset)
+
+    def _task_dir(self, roco_dataset: DirectoryROCODataset) -> Path:
+        return roco_dataset.main_dir / self.task_name
+
+    def _generate(self, roco_dataset: DirectoryROCODataset):
+        armor_dataset = self._make_dataset(roco_dataset)
+        for image, target, name in smart_tqdm(
+            armor_dataset, desc=f"Generating dataset {roco_dataset.name}_{self.task_name} ", unit="frame"
+        ):
+            save_image(image, self._task_dir(roco_dataset) / f"{name}-{target}.jpg")
+        self._lock_file(roco_dataset).write_text(json.dumps({"version": self.VERSION, "date": create_time_id()}))
+
+    def _exists_and_is_valid(self, roco_dataset: DirectoryROCODataset) -> bool:
+        lock = self._lock_file(roco_dataset)
+        return lock.exists() and json.loads(lock.read_text())["version"] == self.VERSION
+
+    def _make_dataset(self, roco_dataset) -> Dataset[Image, ValueT]:
+        return ArmorDatasetFactory(roco_dataset).make().transform_targets(self._value_from_armor)
+
+    def _lock_file(self, roco_dataset: DirectoryROCODataset) -> Path:
+        return self._task_dir(roco_dataset) / ".lock"
+
+    @abstractmethod
+    def _value_from_armor(self, armor: Armor) -> ValueT:
+        pass
-- 
GitLab