From 26ee387aae05a735633e98a1571e9b5c78e29fd6 Mon Sep 17 00:00:00 2001
From: Mathieu Beligon <mathieu@feedly.com>
Date: Fri, 25 Sep 2020 18:48:53 +0200
Subject: [PATCH] [common] (datasets) refactor zoo: drop the enum usage for a
 custom metaclass

---
 .../common/dataset/directory_roco_dataset.py  | 26 -------
 .../research/common/dataset/dji/__init__.py   |  0
 .../common/dataset/dji/dji_roco_datasets.py   | 14 ----
 .../dataset/dji/dji_roco_zoomed_datasets.py   | 15 -----
 .../research/common/dataset/roco_dataset.py   | 27 --------
 common/research/common/dataset/split.py       | 14 ----
 .../common/dataset/tensorflow_record.py       | 10 ++-
 .../dataset/twitch/twitch_roco_datasets.py    | 35 ----------
 .../research/common/dataset/union_dataset.py  | 11 ---
 .../common/datasets_v3/dataset_builder.py     |  3 +
 .../roco/roco_dataset_descriptor.py           |  5 +-
 .../common/datasets_v3/roco/roco_datasets.py  | 67 +++++++++----------
 .../common/datasets_v3/roco/zoo/dji.py        | 17 ++---
 .../common/datasets_v3/roco/zoo/dji_zoomed.py | 19 +++---
 .../common/datasets_v3/roco/zoo/twitch.py     | 36 ++++------
 .../scripts/create_tensorflow_records.py      |  2 +-
 .../common/scripts/improve_roco_by_zooming.py |  4 +-
 .../common/scripts/visualize_dataset.py       |  2 +-
 .../armor_color/armor_color_dataset.py        |  2 +-
 .../armor_color/baseline_experiments.py       |  4 +-
 .../armor_digit/armor_digit_dataset.py        |  2 +-
 .../armor_digit/baseline_experiments.py       |  4 +-
 .../dataset/armor_dataset_factory.py          |  2 +-
 .../robots_at_robots/demos/demo_infer.py      |  2 +-
 .../robots_at_robots/demos/demo_pipeline.py   |  4 +-
 25 files changed, 84 insertions(+), 243 deletions(-)
 delete mode 100644 common/research/common/dataset/directory_roco_dataset.py
 delete mode 100644 common/research/common/dataset/dji/__init__.py
 delete mode 100644 common/research/common/dataset/dji/dji_roco_datasets.py
 delete mode 100644 common/research/common/dataset/dji/dji_roco_zoomed_datasets.py
 delete mode 100644 common/research/common/dataset/roco_dataset.py
 delete mode 100644 common/research/common/dataset/split.py
 delete mode 100644 common/research/common/dataset/twitch/twitch_roco_datasets.py
 delete mode 100644 common/research/common/dataset/union_dataset.py

diff --git a/common/research/common/dataset/directory_roco_dataset.py b/common/research/common/dataset/directory_roco_dataset.py
deleted file mode 100644
index adf694d..0000000
--- a/common/research/common/dataset/directory_roco_dataset.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from pathlib import Path
-from typing import Iterable
-
-from research.common.dataset.roco_dataset import ROCODataset
-
-
-class DirectoryROCODataset(ROCODataset):
-    def __init__(self, dataset_path: Path, dataset_name: str):
-        self.dataset_name = dataset_name
-        self.dataset_path = dataset_path
-
-    @property
-    def images_dir_path(self) -> Path:
-        return self.dataset_path / "image"
-
-    @property
-    def annotations_dir_path(self) -> Path:
-        return self.dataset_path / "image_annotation"
-
-    @property
-    def image_paths(self) -> Iterable[Path]:
-        return self.images_dir_path.glob("*.jpg")
-
-    @property
-    def annotation_paths(self) -> Iterable[Path]:
-        return self.annotations_dir_path.glob("*.xml")
diff --git a/common/research/common/dataset/dji/__init__.py b/common/research/common/dataset/dji/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/common/research/common/dataset/dji/dji_roco_datasets.py b/common/research/common/dataset/dji/dji_roco_datasets.py
deleted file mode 100644
index b85ef3f..0000000
--- a/common/research/common/dataset/dji/dji_roco_datasets.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from enum import Enum
-
-from research.common.constants import DJI_ROCO_DSET_DIR
-from research.common.dataset.directory_roco_dataset import DirectoryROCODataset
-
-
-class DJIROCODataset(DirectoryROCODataset, Enum):
-    def __init__(self, competition_name: str):
-        super().__init__(DJI_ROCO_DSET_DIR / competition_name, self.name)
-
-    CentralChina = "robomaster_Central China Regional Competition"
-    NorthChina = "robomaster_North China Regional Competition"
-    SouthChina = "robomaster_South China Regional Competition"
-    Final = "robomaster_Final Tournament"
diff --git a/common/research/common/dataset/dji/dji_roco_zoomed_datasets.py b/common/research/common/dataset/dji/dji_roco_zoomed_datasets.py
deleted file mode 100644
index 550eb60..0000000
--- a/common/research/common/dataset/dji/dji_roco_zoomed_datasets.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from enum import Enum, auto
-
-from polystar.common.utils.str_utils import camel2snake
-from research.common.constants import DJI_ROCO_ZOOMED_DSET_DIR
-from research.common.dataset.directory_roco_dataset import DirectoryROCODataset
-
-
-class DJIROCOZoomedDataset(DirectoryROCODataset, Enum):
-    def __init__(self, _):
-        super().__init__(DJI_ROCO_ZOOMED_DSET_DIR / camel2snake(self.name), f"{self.name}ZoomedV2")
-
-    CentralChina = auto()
-    NorthChina = auto()
-    SouthChina = auto()
-    Final = auto()
diff --git a/common/research/common/dataset/roco_dataset.py b/common/research/common/dataset/roco_dataset.py
deleted file mode 100644
index 7c8b9e6..0000000
--- a/common/research/common/dataset/roco_dataset.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Iterable
-
-from more_itertools import ilen
-from polystar.common.models.image import Image, load_image
-from polystar.common.models.image_annotation import ImageAnnotation
-
-
-@dataclass
-class ROCODataset:
-    image_paths: Iterable[Path]
-    annotation_paths: Iterable[Path]
-    dataset_name: str
-
-    @property
-    def images(self) -> Iterable[Image]:
-        for image_path in self.image_paths:
-            yield load_image(image_path)
-
-    @property
-    def image_annotations(self) -> Iterable[ImageAnnotation]:
-        for annotation_path in self.annotation_paths:
-            yield ImageAnnotation.from_xml_file(annotation_path)
-
-    def __len__(self) -> int:
-        return ilen(self.image_annotations)
diff --git a/common/research/common/dataset/split.py b/common/research/common/dataset/split.py
deleted file mode 100644
index eeecc6f..0000000
--- a/common/research/common/dataset/split.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from enum import Enum
-from pathlib import Path
-
-from research.common.dataset.directory_roco_dataset import DirectoryROCODataset
-
-
-class Split(Enum):
-    Val = "val"
-    Train = "train"
-    Test = "test"
-    TrainVal = "trainval"
-
-    def get_split_file(self, dataset: DirectoryROCODataset) -> Path:
-        return (dataset.dataset_path / self.value).with_suffix(".txt")
diff --git a/common/research/common/dataset/tensorflow_record.py b/common/research/common/dataset/tensorflow_record.py
index e7ab094..e4e8154 100644
--- a/common/research/common/dataset/tensorflow_record.py
+++ b/common/research/common/dataset/tensorflow_record.py
@@ -10,19 +10,17 @@ from polystar.common.models.label_map import label_map
 from polystar.common.utils.tqdm import smart_tqdm
 from research.common.constants import TENSORFLOW_RECORDS_DIR
 from research.common.datasets_v3.roco.roco_annotation import ROCOAnnotation
-from research.common.datasets_v3.roco.roco_datasets import ROCODatasets
+from research.common.datasets_v3.roco.roco_dataset_builder import ROCODatasetBuilder
 
 
 class TensorflowRecordFactory:
     @staticmethod
-    def from_datasets(datasets: List[ROCODatasets], prefix: str = ""):
+    def from_datasets(datasets: List[ROCODatasetBuilder], prefix: str = ""):
         record_name = prefix + "_".join(d.name for d in datasets)
         writer = python_io.TFRecordWriter(str(TENSORFLOW_RECORDS_DIR / f"{record_name}.record"))
         c = 0
         for dataset in smart_tqdm(datasets, desc=record_name, unit="dataset"):
-            for image_path, annotation, _ in smart_tqdm(
-                dataset.lazy_files(), desc=dataset.name, unit="img", leave=False
-            ):
+            for image_path, annotation, _ in smart_tqdm(dataset, desc=dataset.name, unit="img", leave=False):
                 writer.write(_example_from_image_annotation(image_path, annotation).SerializeToString())
                 c += 1
         writer.close()
@@ -32,7 +30,7 @@ class TensorflowRecordFactory:
         )
 
     @staticmethod
-    def from_dataset(dataset: ROCODatasets, prefix: str = ""):
+    def from_dataset(dataset: ROCODatasetBuilder, prefix: str = ""):
         TensorflowRecordFactory.from_datasets([dataset], prefix)
 
 
diff --git a/common/research/common/dataset/twitch/twitch_roco_datasets.py b/common/research/common/dataset/twitch/twitch_roco_datasets.py
deleted file mode 100644
index f6c1ac6..0000000
--- a/common/research/common/dataset/twitch/twitch_roco_datasets.py
+++ /dev/null
@@ -1,35 +0,0 @@
-"""
->>> TwitchROCODataset.TWITCH_470149568.dataset_name
-'T470149568'
-
->>> from research import DSET_DIR
->>> TwitchROCODataset.TWITCH_470149568.dataset_path.relative_to(DSET_DIR)
-PosixPath('twitch/v1/470149568')
-
->>> TwitchROCODataset.TWITCH_470149568.video_url
-'https://www.twitch.tv/videos/470149568'
-"""
-
-from enum import Enum, auto
-
-from research.common.constants import TWITCH_DSET_DIR
-from research.common.dataset.directory_roco_dataset import DirectoryROCODataset
-
-
-class TwitchROCODataset(DirectoryROCODataset, Enum):
-    def __init__(self, _):
-        self.twitch_id = self.name[len("TWITCH_") :]
-        super().__init__(TWITCH_DSET_DIR / "v1" / self.twitch_id, f"T{self.twitch_id}")
-
-    @property
-    def video_url(self) -> str:
-        return f"https://www.twitch.tv/videos/{self.twitch_id}"
-
-    TWITCH_470149568 = auto()
-    TWITCH_470150052 = auto()
-    TWITCH_470151286 = auto()
-    TWITCH_470152289 = auto()
-    TWITCH_470152730 = auto()
-    TWITCH_470152838 = auto()
-    TWITCH_470153081 = auto()
-    TWITCH_470158483 = auto()
diff --git a/common/research/common/dataset/union_dataset.py b/common/research/common/dataset/union_dataset.py
deleted file mode 100644
index 80560c5..0000000
--- a/common/research/common/dataset/union_dataset.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from research.common.dataset.roco_dataset import ROCODataset
-
-
-class UnionDataset(ROCODataset):
-    def __init__(self, *datasets: ROCODataset):
-
-        super().__init__(
-            image_paths=[image_path for dataset in datasets for image_path in dataset.image_paths],
-            annotation_paths=[annotation_path for dataset in datasets for annotation_path in dataset.annotation_paths],
-            dataset_name="_".join(dataset.dataset_name for dataset in datasets),
-        )
diff --git a/common/research/common/datasets_v3/dataset_builder.py b/common/research/common/datasets_v3/dataset_builder.py
index bae20b9..fc9806b 100644
--- a/common/research/common/datasets_v3/dataset_builder.py
+++ b/common/research/common/datasets_v3/dataset_builder.py
@@ -23,6 +23,9 @@ class DatasetBuilder(Generic[ExampleT, TargetT], Iterable[Tuple[ExampleT, Target
     def __iter__(self) -> Iterator[Tuple[ExampleT, TargetT, str]]:
         return iter(self.build_lazy())
 
+    def __len__(self):
+        return len(self.dataset)
+
     def build(self) -> Dataset[ExampleT, TargetT]:
         assert not self._built
         self._built = True
diff --git a/common/research/common/datasets_v3/roco/roco_dataset_descriptor.py b/common/research/common/datasets_v3/roco/roco_dataset_descriptor.py
index 0904aad..f50925f 100644
--- a/common/research/common/datasets_v3/roco/roco_dataset_descriptor.py
+++ b/common/research/common/datasets_v3/roco/roco_dataset_descriptor.py
@@ -67,8 +67,7 @@ def make_markdown_dataset_report(dataset: LazyROCOFileDataset, report_dir: Path)
 
 
 if __name__ == "__main__":
-    dset = ROCODatasetsZoo.DJI.FINAL
     for datasets in ROCODatasetsZoo:
-        make_markdown_dataset_report(datasets.union(), datasets.datasets_dir())
+        make_markdown_dataset_report(datasets.union(), datasets.main_dir)
         for dset in datasets:
-            make_markdown_dataset_report(dset.lazy_files(), dset.main_dir)
+            make_markdown_dataset_report(dset.build_lazy(), dset.main_dir)
diff --git a/common/research/common/datasets_v3/roco/roco_datasets.py b/common/research/common/datasets_v3/roco/roco_datasets.py
index ff5fa10..993fd40 100644
--- a/common/research/common/datasets_v3/roco/roco_datasets.py
+++ b/common/research/common/datasets_v3/roco/roco_datasets.py
@@ -1,57 +1,50 @@
 from abc import abstractmethod
-from enum import Enum, EnumMeta
 from pathlib import Path
-from typing import Iterator
+from typing import ClassVar, Iterator, Set
 
-from polystar.common.utils.str_utils import snake2camel
 from research.common.datasets_v3.roco.roco_annotation import ROCOAnnotation
-from research.common.datasets_v3.roco.roco_dataset import (
-    LazyROCODataset,
-    LazyROCOFileDataset,
-    ROCODataset,
-    ROCOFileDataset,
-)
 from research.common.datasets_v3.roco.roco_dataset_builder import ROCODatasetBuilder
 from research.common.datasets_v3.union_dataset import UnionLazyDataset
 
 
-class ROCODatasets(Enum):
-    def __init__(self, dataset_dir_name: str, dataset_name: str = None):
-        self.dataset_name = dataset_name or snake2camel(self.name)
-        self._dataset_dir_name = dataset_dir_name
+class ROCODatasetsMeta(type):
+    def __init__(cls, name: str, bases, dct):
+        super().__init__(name, bases, dct)
+        cls.__ignore__: Set[str] = set(getattr(cls, "__ignore__", [])) | {"name", "keys", "union"}
+        cls.name = cls.__name__.replace("Datasets", "").replace("ROCO", "")
 
-    def lazy(self) -> LazyROCODataset:
-        return self.builder.to_images().build_lazy()
+    def __call__(cls, *args, **kwargs):
+        raise NotImplemented("This class should not be implemented")
 
-    def lazy_files(self) -> LazyROCOFileDataset:
-        return self.builder.build_lazy()
+    def __iter__(cls) -> Iterator[ROCODatasetBuilder]:
+        return (cls._make_builder_from_name(name) for name in dir(cls) if _is_builder_name(cls, name))
 
-    def dataset(self) -> ROCODataset:
-        return self.builder.to_images().build()
+    def union(cls) -> UnionLazyDataset[Path, ROCOAnnotation]:
+        return UnionLazyDataset(cls, cls.name)
+
+    def __getattribute__(cls, name: str):
+        if not _is_builder_name(cls, name):
+            return super().__getattribute__(name)
 
-    def files_dataset(self) -> ROCOFileDataset:
-        return self.builder.build()
+        return cls._make_builder_from_name(name)
 
-    @property
-    def main_dir(self):
-        return self.datasets_dir() / self._dataset_dir_name
+    def _make_builder_from_name(cls, name: str) -> ROCODatasetBuilder:
+        args = super().__getattribute__(name)
+        if not isinstance(args, tuple):
+            args = (args,)
 
-    @property
-    def builder(self) -> ROCODatasetBuilder:
-        return ROCODatasetBuilder(self.main_dir, self.dataset_name)
+        return cls._make_builder_from_args(name, *args)
 
-    @classmethod
     @abstractmethod
-    def datasets_dir(cls) -> Path:  # Fixme: in python 37, we can define a class var using the _ignore_ attribute
+    def _make_builder_from_args(cls, name: str, *args) -> ROCODatasetBuilder:
         pass
 
-    @classmethod
-    def __iter__(cls) -> Iterator["ROCODatasets"]:  # needed for pycharm typing, dont know why
-        return EnumMeta.__iter__(cls)
 
-    @classmethod
-    def union(cls) -> UnionLazyDataset[Path, ROCOAnnotation]:
-        return UnionLazyDataset((d.lazy_files() for d in cls), cls.datasets_name)
+def _is_builder_name(cls: ROCODatasetsMeta, name: str) -> bool:
+    return not name.startswith("_") and name not in cls.__ignore__
+
+
+class ROCODatasets(metaclass=ROCODatasetsMeta):
+    main_dir: ClassVar[Path]
 
-    def __init_subclass__(cls, **kwargs):
-        cls.datasets_name = cls.__name__.replace("Datasets", "").replace("ROCO", "")
+    __ignore__ = {"main_dir"}
diff --git a/common/research/common/datasets_v3/roco/zoo/dji.py b/common/research/common/datasets_v3/roco/zoo/dji.py
index 966eb19..ac5af1c 100644
--- a/common/research/common/datasets_v3/roco/zoo/dji.py
+++ b/common/research/common/datasets_v3/roco/zoo/dji.py
@@ -1,15 +1,16 @@
-from pathlib import Path
-
 from research.common.constants import DJI_ROCO_DSET_DIR
+from research.common.datasets_v3.roco.roco_dataset_builder import ROCODatasetBuilder
 from research.common.datasets_v3.roco.roco_datasets import ROCODatasets
 
 
 class DJIROCODatasets(ROCODatasets):
-    CENTRAL_CHINA = "robomaster_Central China Regional Competition"
-    NORTH_CHINA = "robomaster_North China Regional Competition"
-    SOUTH_CHINA = "robomaster_South China Regional Competition"
-    FINAL = "robomaster_Final Tournament"
+    main_dir = DJI_ROCO_DSET_DIR
+
+    CENTRAL_CHINA: ROCODatasetBuilder = "robomaster_Central China Regional Competition"
+    NORTH_CHINA: ROCODatasetBuilder = "robomaster_North China Regional Competition"
+    SOUTH_CHINA: ROCODatasetBuilder = "robomaster_South China Regional Competition"
+    FINAL: ROCODatasetBuilder = "robomaster_Final Tournament"
 
     @classmethod
-    def datasets_dir(cls) -> Path:
-        return DJI_ROCO_DSET_DIR
+    def _make_builder_from_args(cls, name: str, competition_name: str) -> ROCODatasetBuilder:
+        return ROCODatasetBuilder(cls.main_dir / competition_name, name)
diff --git a/common/research/common/datasets_v3/roco/zoo/dji_zoomed.py b/common/research/common/datasets_v3/roco/zoo/dji_zoomed.py
index 2e185d3..4d10b33 100644
--- a/common/research/common/datasets_v3/roco/zoo/dji_zoomed.py
+++ b/common/research/common/datasets_v3/roco/zoo/dji_zoomed.py
@@ -1,20 +1,17 @@
-from enum import auto
-from pathlib import Path
-
 from polystar.common.utils.str_utils import snake2camel
 from research.common.constants import DJI_ROCO_ZOOMED_DSET_DIR
+from research.common.datasets_v3.roco.roco_dataset_builder import ROCODatasetBuilder
 from research.common.datasets_v3.roco.roco_datasets import ROCODatasets
 
 
 class DJIROCOZoomedDatasets(ROCODatasets):
-    def __init__(self, _):
-        super().__init__(self.name.lower(), f"{snake2camel(self.name)}ZoomedV2")
+    main_dir = DJI_ROCO_ZOOMED_DSET_DIR
 
-    CENTRAL_CHINA = auto()
-    NORTH_CHINA = auto()
-    SOUTH_CHINA = auto()
-    FINAL = auto()
+    CENTRAL_CHINA: ROCODatasetBuilder = ()
+    NORTH_CHINA: ROCODatasetBuilder = ()
+    SOUTH_CHINA: ROCODatasetBuilder = ()
+    FINAL: ROCODatasetBuilder = ()
 
     @classmethod
-    def datasets_dir(cls) -> Path:
-        return DJI_ROCO_ZOOMED_DSET_DIR
+    def _make_builder_from_args(cls, name: str) -> ROCODatasetBuilder:
+        return ROCODatasetBuilder(cls.main_dir / name.lower(), f"{snake2camel(name)}Zoomed")
diff --git a/common/research/common/datasets_v3/roco/zoo/twitch.py b/common/research/common/datasets_v3/roco/zoo/twitch.py
index 013d7e4..7cfecb3 100644
--- a/common/research/common/datasets_v3/roco/zoo/twitch.py
+++ b/common/research/common/datasets_v3/roco/zoo/twitch.py
@@ -1,31 +1,21 @@
-from enum import auto
-from pathlib import Path
-
 from research.common.constants import TWITCH_DSET_DIR
+from research.common.datasets_v3.roco.roco_dataset_builder import ROCODatasetBuilder
 from research.common.datasets_v3.roco.roco_datasets import ROCODatasets
 
 
 class TwitchROCODatasets(ROCODatasets):
-    def __init__(self, _):
-        super().__init__(self.twitch_id)
+    main_dir = TWITCH_DSET_DIR / "v1"
 
-    T470149568 = auto()
-    T470150052 = auto()
-    T470151286 = auto()
-    T470152289 = auto()
-    T470152730 = auto()
-    T470152838 = auto()
-    T470153081 = auto()
-    T470158483 = auto()
+    T470149568: ROCODatasetBuilder = ()
+    T470150052: ROCODatasetBuilder = ()
+    T470151286: ROCODatasetBuilder = ()
+    T470152289: ROCODatasetBuilder = ()
+    T470152730: ROCODatasetBuilder = ()
+    T470152838: ROCODatasetBuilder = ()
+    T470153081: ROCODatasetBuilder = ()
+    T470158483: ROCODatasetBuilder = ()
 
     @classmethod
-    def datasets_dir(cls) -> Path:
-        return TWITCH_DSET_DIR / "v1"
-
-    @property
-    def twitch_id(self) -> str:
-        return self.name[len("T") :]
-
-    @property
-    def video_url(self) -> str:
-        return f"https://www.twitch.tv/videos/{self.twitch_id}"
+    def _make_builder_from_args(cls, name: str) -> ROCODatasetBuilder:
+        twitch_id = name[1:]
+        return ROCODatasetBuilder(cls.main_dir / twitch_id, name)
diff --git a/common/research/common/scripts/create_tensorflow_records.py b/common/research/common/scripts/create_tensorflow_records.py
index 768b1c2..9456688 100644
--- a/common/research/common/scripts/create_tensorflow_records.py
+++ b/common/research/common/scripts/create_tensorflow_records.py
@@ -31,7 +31,7 @@ def create_dji_records():
     TensorflowRecordFactory.from_datasets(
         [DJIROCODatasets.CENTRAL_CHINA, DJIROCODatasets.NORTH_CHINA, DJIROCODatasets.SOUTH_CHINA], "DJI_Train_"
     )
-    TensorflowRecordFactory.from_dataset(DJIROCODatasets.Final, "DJI_Test_")
+    TensorflowRecordFactory.from_dataset(DJIROCODatasets.FINAL, "DJI_Test_")
 
 
 def create_dji_zoomed_records():
diff --git a/common/research/common/scripts/improve_roco_by_zooming.py b/common/research/common/scripts/improve_roco_by_zooming.py
index 5547422..eee88b9 100644
--- a/common/research/common/scripts/improve_roco_by_zooming.py
+++ b/common/research/common/scripts/improve_roco_by_zooming.py
@@ -28,7 +28,9 @@ def improve_dji_roco_dataset_by_zooming_and_perturbating(
 
 def improve_all_dji_datasets_by_zooming_and_perturbating(zoomer: Zoomer, perturbator: ImagePerturbator):
     for _dset in ROCODatasetsZoo.DJI:
-        improve_dji_roco_dataset_by_zooming_and_perturbating(zoomer=zoomer, dset=_dset.lazy(), perturbator=perturbator)
+        improve_dji_roco_dataset_by_zooming_and_perturbating(
+            zoomer=zoomer, dset=_dset.to_images().build_lazy(), perturbator=perturbator
+        )
 
 
 def _prepare_empty_zoomed_dir(dir_path: Path) -> Tuple[Path, Path]:
diff --git a/common/research/common/scripts/visualize_dataset.py b/common/research/common/scripts/visualize_dataset.py
index a74f2e4..636ddd5 100644
--- a/common/research/common/scripts/visualize_dataset.py
+++ b/common/research/common/scripts/visualize_dataset.py
@@ -14,4 +14,4 @@ def visualize_dataset(dataset: LazyROCODataset, n_images: int):
 
 
 if __name__ == "__main__":
-    visualize_dataset(ROCODatasetsZoo.DJI_ZOOMED.CENTRAL_CHINA.lazy(), 20)
+    visualize_dataset(ROCODatasetsZoo.DJI_ZOOMED.CENTRAL_CHINA.to_images().build_lazy(), 20)
diff --git a/robots-at-robots/research/robots_at_robots/armor_color/armor_color_dataset.py b/robots-at-robots/research/robots_at_robots/armor_color/armor_color_dataset.py
index 052232d..2aff0a8 100644
--- a/robots-at-robots/research/robots_at_robots/armor_color/armor_color_dataset.py
+++ b/robots-at-robots/research/robots_at_robots/armor_color/armor_color_dataset.py
@@ -19,7 +19,7 @@ def make_armor_color_dataset_generator() -> ArmorValueDatasetGenerator[str]:
 
 
 if __name__ == "__main__":
-    _roco_dataset_builder = ROCODatasetsZoo.DJI.CENTRAL_CHINA.builder
+    _roco_dataset_builder = ROCODatasetsZoo.DJI.CENTRAL_CHINA
     _armor_color_dataset = make_armor_color_dataset_generator().from_roco_dataset(_roco_dataset_builder)
 
     for p, c, _name in islice(_armor_color_dataset, 20, 25):
diff --git a/robots-at-robots/research/robots_at_robots/armor_color/baseline_experiments.py b/robots-at-robots/research/robots_at_robots/armor_color/baseline_experiments.py
index e84a24c..06f6354 100644
--- a/robots-at-robots/research/robots_at_robots/armor_color/baseline_experiments.py
+++ b/robots-at-robots/research/robots_at_robots/armor_color/baseline_experiments.py
@@ -13,8 +13,8 @@ if __name__ == "__main__":
     logging.getLogger().setLevel("INFO")
 
     reporter = ArmorColorPipelineReporterFactory.from_roco_datasets(
-        train_roco_datasets=[ROCODatasetsZoo.TWITCH.T470151286.builder, ROCODatasetsZoo.TWITCH.T470150052.builder],
-        test_roco_datasets=[ROCODatasetsZoo.TWITCH.T470152289.builder],
+        train_roco_datasets=[ROCODatasetsZoo.TWITCH.T470151286, ROCODatasetsZoo.TWITCH.T470150052],
+        test_roco_datasets=[ROCODatasetsZoo.TWITCH.T470152289],
     )
 
     red_blue_comparison_pipeline = ClassifierImagePipeline(
diff --git a/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_dataset.py b/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_dataset.py
index 9eb589c..38b9e63 100644
--- a/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_dataset.py
+++ b/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_dataset.py
@@ -21,7 +21,7 @@ def make_armor_digit_dataset_generator(acceptable_digits: Iterable[int]) -> Armo
 
 
 if __name__ == "__main__":
-    _roco_dataset_builder = ROCODatasetsZoo.DJI.CENTRAL_CHINA.builder
+    _roco_dataset_builder = ROCODatasetsZoo.DJI.CENTRAL_CHINA
     _armor_digit_dataset = make_armor_digit_dataset_generator([1, 2]).from_roco_dataset(_roco_dataset_builder)
 
     for p, c, _name in islice(_armor_digit_dataset, 20, 30):
diff --git a/robots-at-robots/research/robots_at_robots/armor_digit/baseline_experiments.py b/robots-at-robots/research/robots_at_robots/armor_digit/baseline_experiments.py
index 74ed551..9925a7b 100644
--- a/robots-at-robots/research/robots_at_robots/armor_digit/baseline_experiments.py
+++ b/robots-at-robots/research/robots_at_robots/armor_digit/baseline_experiments.py
@@ -11,8 +11,8 @@ if __name__ == "__main__":
     logging.getLogger().setLevel("INFO")
 
     reporter = ArmorDigitPipelineReporterFactory.from_roco_datasets(
-        train_roco_datasets=[ROCODatasetsZoo.TWITCH.T470151286.builder, ROCODatasetsZoo.TWITCH.T470150052.builder],
-        test_roco_datasets=[ROCODatasetsZoo.TWITCH.T470152289.builder],
+        train_roco_datasets=[ROCODatasetsZoo.TWITCH.T470151286, ROCODatasetsZoo.TWITCH.T470150052],
+        test_roco_datasets=[ROCODatasetsZoo.TWITCH.T470152289],
     )
 
     random_pipeline = ClassifierImagePipeline(model=RandomModel(), custom_name="random")
diff --git a/robots-at-robots/research/robots_at_robots/dataset/armor_dataset_factory.py b/robots-at-robots/research/robots_at_robots/dataset/armor_dataset_factory.py
index 5631ae1..7b98b40 100644
--- a/robots-at-robots/research/robots_at_robots/dataset/armor_dataset_factory.py
+++ b/robots-at-robots/research/robots_at_robots/dataset/armor_dataset_factory.py
@@ -31,7 +31,7 @@ class ArmorDataset(LazyDataset[Image, Armor]):
 
 
 if __name__ == "__main__":
-    for _armor_img, _armor, _name in islice(ArmorDataset(ROCODatasetsZoo.DJI.CENTRAL_CHINA.lazy()), 20, 30):
+    for _armor_img, _armor, _name in islice(ArmorDataset(ROCODatasetsZoo.DJI.CENTRAL_CHINA.to_images()), 20, 30):
         print(_name, repr(_armor))
         plt.imshow(_armor_img)
         plt.show()
diff --git a/robots-at-robots/research/robots_at_robots/demos/demo_infer.py b/robots-at-robots/research/robots_at_robots/demos/demo_infer.py
index 56c9dad..c5d8edd 100644
--- a/robots-at-robots/research/robots_at_robots/demos/demo_infer.py
+++ b/robots-at-robots/research/robots_at_robots/demos/demo_infer.py
@@ -16,7 +16,7 @@ if __name__ == "__main__":
     filters = [ConfidenceObjectValidator(confidence_threshold=0.5)]
 
     with PltResultViewer("Demo of tf model") as viewer:
-        for image, _, _ in ROCODatasetsZoo.DJI.CENTRAL_CHINA.builder.to_images().cap(5):
+        for image, _, _ in ROCODatasetsZoo.DJI.CENTRAL_CHINA.to_images().cap(5):
             objects = objects_detector.detect(image)
             for f in filters:
                 objects = f.filter(objects, image)
diff --git a/robots-at-robots/research/robots_at_robots/demos/demo_pipeline.py b/robots-at-robots/research/robots_at_robots/demos/demo_pipeline.py
index a049f39..9149086 100644
--- a/robots-at-robots/research/robots_at_robots/demos/demo_pipeline.py
+++ b/robots-at-robots/research/robots_at_robots/demos/demo_pipeline.py
@@ -47,8 +47,8 @@ if __name__ == "__main__":
     )
 
     with PltResultViewer("Demo of tf model") as viewer:
-        for dset in (ROCODatasetsZoo.TWITCH.T470150052, ROCODatasetsZoo.DJI.CENTRAL_CHINA):
-            for image_path, _, _ in dset.builder.cap(5):
+        for builder in (ROCODatasetsZoo.TWITCH.T470150052, ROCODatasetsZoo.DJI.CENTRAL_CHINA):
+            for image_path, _, _ in builder.cap(5):
                 try:
                     image = cv2.cvtColor(cv2.imread(str(image_path)), cv2.COLOR_BGR2RGB)
                     target = pipeline.predict_target(image)
-- 
GitLab