From c056c578790b5a69668797afea750916a3d853db Mon Sep 17 00:00:00 2001
From: Mathieu Beligon <mathieu@feedly.com>
Date: Tue, 6 Apr 2021 13:20:44 -0400
Subject: [PATCH] [AIR] Create AIR datasets

---
 .../objects_filters/type_object_filter.py     |  3 +
 src/polystar/view/results_viewer_abc.py       |  4 --
 .../dataset/armor_value_dataset_cache.py      | 69 +++++++------------
 .../dataset/armor_value_dataset_generator.py  | 11 ++-
 .../common/datasets/dataset_builder.py        |  9 +++
 .../common/datasets/observable_dataset.py     | 26 +++++++
 .../common/datasets/roco/air_dataset.py       | 37 ++++++++++
 .../common/datasets/roco/roco_annotation.py   |  6 ++
 .../datasets/roco/roco_dataset_builder.py     |  9 +++
 .../datasets/roco/roco_objects_dataset.py     |  4 +-
 .../common/datasets/transform_dataset.py      |  2 +-
 src/research/dataset/improvement/zoom.py      |  3 +-
 src/research/dataset/tensorflow_record.py     |  8 ++-
 src/research/roco_detection/__init__.py       |  0
 src/research/roco_detection/robots_dataset.py | 37 ++++++++++
 .../scripts/create_air_datasets.py            |  7 ++
 .../scripts/create_air_tensorflow_records.py  | 22 ++++++
 .../roco_detection/small_base_filter.py       |  6 ++
 18 files changed, 203 insertions(+), 60 deletions(-)
 create mode 100644 src/research/common/datasets/observable_dataset.py
 create mode 100644 src/research/common/datasets/roco/air_dataset.py
 create mode 100644 src/research/roco_detection/__init__.py
 create mode 100644 src/research/roco_detection/robots_dataset.py
 create mode 100644 src/research/roco_detection/scripts/create_air_datasets.py
 create mode 100644 src/research/roco_detection/scripts/create_air_tensorflow_records.py
 create mode 100644 src/research/roco_detection/small_base_filter.py

diff --git a/src/polystar/target_pipeline/objects_filters/type_object_filter.py b/src/polystar/target_pipeline/objects_filters/type_object_filter.py
index b8d28e4..3fe846e 100644
--- a/src/polystar/target_pipeline/objects_filters/type_object_filter.py
+++ b/src/polystar/target_pipeline/objects_filters/type_object_filter.py
@@ -12,3 +12,6 @@ class TypeObjectsFilter(ObjectsFilterABC):
 
     def validate_single(self, obj: ROCOObject) -> bool:
         return obj.type in self.desired_types
+
+
+ARMORS_FILTER: ObjectsFilterABC = -TypeObjectsFilter({ObjectType.ARMOR})
diff --git a/src/polystar/view/results_viewer_abc.py b/src/polystar/view/results_viewer_abc.py
index ea78e95..7cfb04a 100644
--- a/src/polystar/view/results_viewer_abc.py
+++ b/src/polystar/view/results_viewer_abc.py
@@ -3,7 +3,6 @@ from itertools import cycle
 from typing import Iterable, Sequence, Tuple
 
 from polystar.models.image import Image
-from polystar.models.roco_image_annotation import ROCOImageAnnotation
 from polystar.models.roco_object import ROCOObject
 from polystar.target_pipeline.debug_pipeline import DebugInfo
 from polystar.target_pipeline.detected_objects.detected_robot import DetectedRobot, FakeDetectedRobot
@@ -55,9 +54,6 @@ class ResultViewerABC(ABC):
         self.add_objects(objects)
         self.display()
 
-    def display_image_annotation(self, annotation: ROCOImageAnnotation):
-        self.display_image_with_objects(annotation.image, annotation.objects)
-
     def display_debug_info(self, debug_info: DebugInfo):
         self.add_debug_info(debug_info)
         self.display()
diff --git a/src/research/armors/dataset/armor_value_dataset_cache.py b/src/research/armors/dataset/armor_value_dataset_cache.py
index cab852c..d03352f 100644
--- a/src/research/armors/dataset/armor_value_dataset_cache.py
+++ b/src/research/armors/dataset/armor_value_dataset_cache.py
@@ -1,66 +1,33 @@
 import json
 from pathlib import Path
 from shutil import rmtree
-from typing import ClassVar, Generic, Optional
-
-from google.cloud.exceptions import Forbidden
+from typing import ClassVar, Generic, Optional, TypeVar
 
 from polystar.models.image import Image, save_image
 from polystar.utils.time import create_time_id
 from polystar.utils.tqdm import smart_tqdm
-from research.armors.dataset.armor_value_target_factory import ArmorValueTargetFactory
-from research.common.datasets.lazy_dataset import LazyDataset, TargetT
-from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilder
-from research.common.gcloud.gcloud_storage import GCStorages
+from research.common.datasets.lazy_dataset import LazyDataset
+
+T = TypeVar("T")
 
 
-class ArmorValueDatasetCache(Generic[TargetT]):
+# TODO: add AWS support here
+class DatasetCache(Generic[T]):
     VERSION: ClassVar[str] = "2.0"
 
-    def __init__(
-        self,
-        roco_dataset_builder: ROCODatasetBuilder,
-        cache_dir: Path,
-        dataset_name: str,
-        target_factory: ArmorValueTargetFactory[TargetT],
-    ):
-        self.target_factory = target_factory
-        self.dataset_name = dataset_name
+    def __init__(self, cache_dir: Path, dataset: LazyDataset[Image, T]):
+        self.dataset = dataset
         self.cache_dir = cache_dir
-        self.roco_dataset_builder = roco_dataset_builder
         self.lock_file = cache_dir / ".lock"
 
-        self.cache_dir.mkdir(parents=True, exist_ok=True)
+        self.cache_dir.mkdir(exist_ok=True, parents=True)
 
-    def generate_or_download_if_needed(self):
+    def generate_if_missing(self):
         cause = self._get_generation_cause()
         if cause is None:
             return
         self._clean_cache_dir()
-        try:
-            GCStorages.DEV.download_directory(self.cache_dir)
-            cause = self._get_generation_cause()
-            if cause is None:
-                return
-            self._clean_cache_dir()
-        except FileNotFoundError:
-            cause += " and not on gcloud"
-        except Forbidden:
-            pass
-        self.save(self._generate(), cause)
-
-    def _clean_cache_dir(self):
-        rmtree(self.cache_dir, ignore_errors=True)
-        self.cache_dir.mkdir()
-
-    def save(self, dataset: LazyDataset[Image, TargetT], cause: str):
-        desc = f"Generating dataset {self.dataset_name} (cause: {cause})"
-        for img, target, name in smart_tqdm(dataset, desc=desc, unit="img"):
-            save_image(img, self.cache_dir / f"{name}-{str(target)}.jpg")
-        self.lock_file.write_text(json.dumps({"version": self.VERSION, "date": create_time_id()}))
-
-    def _generate(self) -> LazyDataset[Image, TargetT]:
-        return self.roco_dataset_builder.to_armors().transform_targets(self.target_factory.from_armor).build_lazy()
+        self._generate(cause)
 
     def _get_generation_cause(self) -> Optional[str]:
         if not self.lock_file.exists():
@@ -68,3 +35,17 @@ class ArmorValueDatasetCache(Generic[TargetT]):
         version = json.loads(self.lock_file.read_text())["version"]
         if version != self.VERSION:
             return f"upgrade [{version} -> {self.VERSION}]"
+
+    def _clean_cache_dir(self):
+        rmtree(self.cache_dir, ignore_errors=True)
+        self.cache_dir.mkdir()
+
+    def _generate(self, cause: str):
+        desc = f"Generating dataset {self.dataset.name} (cause: {cause})"
+        for img, target, name in smart_tqdm(self.dataset, desc=desc, unit="img"):
+            self._save_one(img, target, name)
+
+        self.lock_file.write_text(json.dumps({"version": self.VERSION, "date": create_time_id()}))
+
+    def _save_one(self, img: Image, target: T, name: str):
+        save_image(img, self.cache_dir / f"{name}-{str(target)}.jpg")
diff --git a/src/research/armors/dataset/armor_value_dataset_generator.py b/src/research/armors/dataset/armor_value_dataset_generator.py
index 8f9ed9d..24f0d89 100644
--- a/src/research/armors/dataset/armor_value_dataset_generator.py
+++ b/src/research/armors/dataset/armor_value_dataset_generator.py
@@ -5,7 +5,7 @@ from polystar.filters.exclude_filter import ExcludeFilter
 from polystar.filters.filter_abc import FilterABC
 from polystar.filters.pass_through_filter import PassThroughFilter
 from polystar.models.image import FileImage
-from research.armors.dataset.armor_value_dataset_cache import ArmorValueDatasetCache
+from research.armors.dataset.armor_value_dataset_cache import DatasetCache
 from research.armors.dataset.armor_value_target_factory import ArmorValueTargetFactory
 from research.common.datasets.dataset import Dataset
 from research.common.datasets.image_file_dataset_builder import DirectoryDatasetBuilder
@@ -58,14 +58,13 @@ class ArmorValueDatasetGenerator(Generic[TargetT]):
 
     def from_roco_dataset(self, roco_dataset_builder: ROCODatasetBuilder) -> DirectoryDatasetBuilder[TargetT]:
         cache_dir = roco_dataset_builder.main_dir / self.task_name
-        dataset_name = roco_dataset_builder.name
 
-        ArmorValueDatasetCache(
-            roco_dataset_builder, cache_dir, dataset_name, self.target_factory
-        ).generate_or_download_if_needed()
+        DatasetCache(
+            cache_dir, roco_dataset_builder.to_armors().transform_targets(self.target_factory.from_armor).build_lazy()
+        ).generate_if_missing()
 
         return (
-            DirectoryDatasetBuilder(cache_dir, self.target_factory.from_file, dataset_name)
+            DirectoryDatasetBuilder(cache_dir, self.target_factory.from_file, roco_dataset_builder.name)
             .filter_targets(self.targets_filter)
             .filter_examples(ExcludeFilesFilter(DatasetChanges(cache_dir).invalidated))
         )
diff --git a/src/research/common/datasets/dataset_builder.py b/src/research/common/datasets/dataset_builder.py
index d96cebe..9018430 100644
--- a/src/research/common/datasets/dataset_builder.py
+++ b/src/research/common/datasets/dataset_builder.py
@@ -6,6 +6,7 @@ from polystar.utils.misc import identity
 from research.common.datasets.dataset import Dataset
 from research.common.datasets.filter_dataset import ExampleU, FilterDataset, TargetU
 from research.common.datasets.lazy_dataset import ExampleT, LazyDataset, TargetT
+from research.common.datasets.observable_dataset import ObservableDataset
 from research.common.datasets.shuffle_dataset import ShuffleDataset
 from research.common.datasets.slice_dataset import SliceDataset
 from research.common.datasets.transform_dataset import TransformDataset
@@ -60,6 +61,14 @@ class DatasetBuilder(Generic[ExampleT, TargetT], Iterable[Tuple[ExampleT, Target
         self.dataset = TransformDataset(self.dataset, identity, target_transformer)
         return self
 
+    def apply_to_examples(self, example_observable: Callable[[ExampleT], None]) -> "DatasetBuilder[ExampleU, TargetT]":
+        self.dataset = ObservableDataset(self.dataset, example_observable, identity)
+        return self
+
+    def apply_to_targets(self, target_observable: Callable[[TargetT], None]) -> "DatasetBuilder[ExampleT, TargetU]":
+        self.dataset = ObservableDataset(self.dataset, identity, target_observable)
+        return self
+
     def shuffle(self) -> "DatasetBuilder[ExampleT, TargetU]":
         self.dataset = ShuffleDataset(self.dataset)
         return self
diff --git a/src/research/common/datasets/observable_dataset.py b/src/research/common/datasets/observable_dataset.py
new file mode 100644
index 0000000..4378280
--- /dev/null
+++ b/src/research/common/datasets/observable_dataset.py
@@ -0,0 +1,26 @@
+from typing import Callable, Iterator, Tuple
+
+from research.common.datasets.filter_dataset import ExampleU, TargetU
+from research.common.datasets.lazy_dataset import ExampleT, LazyDataset, TargetT
+
+
+class ObservableDataset(LazyDataset[ExampleT, TargetT]):
+    def __init__(
+        self,
+        source: LazyDataset[ExampleT, TargetT],
+        example_observable: Callable[[ExampleT], None],
+        target_observable: Callable[[TargetT], None],
+    ):
+        self.target_observable = target_observable
+        self.example_observable = example_observable
+        self.source = source
+        super().__init__(source.name)
+
+    def __iter__(self) -> Iterator[Tuple[ExampleU, TargetU, str]]:
+        for example, target, name in self.source:
+            self.example_observable(example)
+            self.target_observable(target)
+            yield example, target, name
+
+    def __len__(self):
+        return len(self.source)
diff --git a/src/research/common/datasets/roco/air_dataset.py b/src/research/common/datasets/roco/air_dataset.py
new file mode 100644
index 0000000..f3ffa6d
--- /dev/null
+++ b/src/research/common/datasets/roco/air_dataset.py
@@ -0,0 +1,37 @@
+from typing import Iterator, Tuple
+
+from polystar.models.image import Image
+from polystar.target_pipeline.objects_filters.objects_filter_abc import ObjectsFilterABC
+from polystar.target_pipeline.objects_filters.type_object_filter import ARMORS_FILTER
+from research.armors.dataset.armor_value_dataset_cache import DatasetCache
+from research.common.datasets.lazy_dataset import LazyDataset
+from research.common.datasets.roco.roco_annotation import ROCOAnnotation
+from research.common.datasets.roco.roco_dataset import LazyROCODataset
+from research.dataset.improvement.zoom import crop_image_annotation
+
+
+class AIRDataset(LazyDataset[Image, ROCOAnnotation]):
+    """Armors In Robots"""
+
+    def __init__(self, roco_dataset: LazyROCODataset, robots_filter: ObjectsFilterABC):
+        super().__init__(roco_dataset.name + "_AIR")
+        self.robots_filter = robots_filter
+        self.roco_dataset = roco_dataset
+
+    def __iter__(self) -> Iterator[Tuple[Image, ROCOAnnotation, str]]:
+        for image, annotation, name in self.roco_dataset:
+            yield from self._generate_from_single(image, annotation, name)
+
+    def _generate_from_single(
+        self, image: Image, annotation: ROCOAnnotation, name: str
+    ) -> Iterator[Tuple[Image, ROCOAnnotation, str]]:
+        annotation.objects, robots = ARMORS_FILTER.split(annotation.objects)
+        for i, robot in enumerate(self.robots_filter.filter(robots)):
+            yield crop_image_annotation(
+                image, annotation, robot.box, min_coverage=0.75, name=f"{name}-{i}-{robot.type.name.lower()}"
+            )
+
+
+class AIRDatasetCache(DatasetCache[ROCOAnnotation]):
+    def _save_one(self, img: Image, annotation: ROCOAnnotation, name: str):
+        annotation.save_with_image(self.cache_dir, img, name)
diff --git a/src/research/common/datasets/roco/roco_annotation.py b/src/research/common/datasets/roco/roco_annotation.py
index 496acd1..5d4a0a2 100644
--- a/src/research/common/datasets/roco/roco_annotation.py
+++ b/src/research/common/datasets/roco/roco_annotation.py
@@ -7,6 +7,7 @@ from xml.dom.minidom import parseString
 import xmltodict
 from dicttoxml import dicttoxml
 
+from polystar.models.image import Image, save_image
 from polystar.models.roco_object import ROCOObject, ROCOObjectFactory
 from polystar.utils.path import move_file
 
@@ -39,6 +40,7 @@ class ROCOAnnotation:
         objects = [
             ROCOObjectFactory(image_w=image_w, image_h=image_h).from_json(obj_json) for obj_json in roco_json_objects
         ]
+        objects = [obj for obj in objects if obj.box.w > 0 and obj.box.h > 0]
 
         return ROCOAnnotation(
             objects=objects, has_rune=len(roco_json_objects) != len(json_objects), w=image_w, h=image_h,
@@ -48,6 +50,10 @@ class ROCOAnnotation:
         directory.mkdir(exist_ok=True, parents=True)
         self.save_in_file((directory / name).with_suffix(".xml"))
 
+    def save_with_image(self, directory: Path, image: Image, name: str):
+        self.save_in_directory(directory / "image_annotation", name)
+        save_image(image, (directory / "image" / name).with_suffix(".jpg"))
+
     def save_in_file(self, file: Path):
         file.write_text(self.to_xml())
 
diff --git a/src/research/common/datasets/roco/roco_dataset_builder.py b/src/research/common/datasets/roco/roco_dataset_builder.py
index cdeabc8..af9eb3c 100644
--- a/src/research/common/datasets/roco/roco_dataset_builder.py
+++ b/src/research/common/datasets/roco/roco_dataset_builder.py
@@ -3,10 +3,13 @@ from pathlib import Path
 from polystar.models.image import Image
 from polystar.models.roco_object import Armor, ObjectType, ROCOObject
 from polystar.target_pipeline.objects_filters.type_object_filter import TypeObjectsFilter
+from research.common.constants import DSET_DIR
 from research.common.datasets.dataset_builder import DatasetBuilder
 from research.common.datasets.image_file_dataset_builder import DirectoryDatasetBuilder
+from research.common.datasets.roco.air_dataset import AIRDataset, AIRDatasetCache
 from research.common.datasets.roco.roco_annotation import ROCOAnnotation
 from research.common.datasets.roco.roco_objects_dataset import ROCOObjectsDataset
+from research.roco_detection.small_base_filter import SMALL_BASE_FILTER
 
 
 class ROCODatasetBuilder(DirectoryDatasetBuilder[ROCOAnnotation]):
@@ -23,5 +26,11 @@ class ROCODatasetBuilder(DirectoryDatasetBuilder[ROCOAnnotation]):
         builder.name = builder.name.replace("objects", "armors")
         return builder
 
+    # FIXME: it makes no sense to have a ROCODatasetBuilder as output
+    def to_air(self) -> "ROCODatasetBuilder":
+        cache_dir = DSET_DIR / "air" / self.main_dir.relative_to(DSET_DIR)
+        AIRDatasetCache(cache_dir, AIRDataset(self.to_images(), SMALL_BASE_FILTER)).generate_if_missing()
+        return ROCODatasetBuilder(cache_dir, self.name + "_AIR")
+
     def _roco_annotation_from_image_file(self, image_file: Path) -> ROCOAnnotation:
         return ROCOAnnotation.from_xml_file(self.annotations_dir / f"{image_file.stem}.xml")
diff --git a/src/research/common/datasets/roco/roco_objects_dataset.py b/src/research/common/datasets/roco/roco_objects_dataset.py
index 9cece54..8cf466d 100644
--- a/src/research/common/datasets/roco/roco_objects_dataset.py
+++ b/src/research/common/datasets/roco/roco_objects_dataset.py
@@ -17,7 +17,9 @@ class ROCOObjectsDataset(LazyDataset[Image, ROCOObject]):
             yield from self._generate_from_single(image, annotation, name)
 
     @staticmethod
-    def _generate_from_single(image: Image, annotation: ROCOAnnotation, name) -> Iterator[Tuple[Image, Armor, str]]:
+    def _generate_from_single(
+        image: Image, annotation: ROCOAnnotation, name: str
+    ) -> Iterator[Tuple[Image, Armor, str]]:
         for i, obj in enumerate(annotation.objects):
             croped_img = image[obj.box.y1 : obj.box.y2, obj.box.x1 : obj.box.x2]
             yield croped_img, obj, f"{name}-{i}"
diff --git a/src/research/common/datasets/transform_dataset.py b/src/research/common/datasets/transform_dataset.py
index 54251be..eed420e 100644
--- a/src/research/common/datasets/transform_dataset.py
+++ b/src/research/common/datasets/transform_dataset.py
@@ -1,6 +1,6 @@
 from typing import Callable, Iterator, Tuple
 
-from research.common.datasets.dataset_builder import ExampleU, TargetU
+from research.common.datasets.filter_dataset import ExampleU, TargetU
 from research.common.datasets.lazy_dataset import ExampleT, LazyDataset, TargetT
 
 
diff --git a/src/research/dataset/improvement/zoom.py b/src/research/dataset/improvement/zoom.py
index 8e73d74..fee3fb2 100644
--- a/src/research/dataset/improvement/zoom.py
+++ b/src/research/dataset/improvement/zoom.py
@@ -8,7 +8,6 @@ from polystar.models.image import Image
 from polystar.target_pipeline.objects_filters.in_box_filter import InBoxObjectFilter
 from polystar.view.plt_results_viewer import PltResultViewer
 from research.common.datasets.roco.roco_annotation import ROCOAnnotation
-from research.common.datasets.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo
 
 
 def crop_image_annotation(
@@ -120,6 +119,8 @@ class Zoomer:
 
 
 if __name__ == "__main__":
+    from research.common.datasets.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo
+
     _zoomer = Zoomer(854, 480, 0.15, 0.5)
 
     for _img, _annot, _name in islice(ROCODatasetsZoo.DJI.NORTH_CHINA.lazy(), 0, 3):
diff --git a/src/research/dataset/tensorflow_record.py b/src/research/dataset/tensorflow_record.py
index 2a092ea..f1948b8 100644
--- a/src/research/dataset/tensorflow_record.py
+++ b/src/research/dataset/tensorflow_record.py
@@ -20,19 +20,21 @@ from research.common.datasets.shuffle_dataset import ShuffleDataset
 from research.common.datasets.union_dataset import UnionDataset
 
 
-class TensorflowRecordFactory:
+class ROCOTensorflowRecordFactory:
+    RECORDS_DIR: Path = TENSORFLOW_RECORDS_DIR / "roco"
+
     def __init__(self, objects_filter: ObjectsFilterABC = PassThroughFilter(), n_images_per_file: int = 200):
         self.n_images_per_file = n_images_per_file
         self.objects_filter = objects_filter
 
-    def from_builders(self, builders: List[ROCODatasetBuilder], prefix: str = ""):
+    def from_builders(self, builders: Iterable[ROCODatasetBuilder], prefix: str = ""):
         dataset = UnionDataset(d.build() for d in builders)
         dataset.name = f"{prefix}_{dataset.name}_{len(dataset)}_imgs"
 
         self.from_dataset(dataset)
 
     def from_dataset(self, dataset: Dataset[Path, ROCOAnnotation]):
-        records_dir = make_path(TENSORFLOW_RECORDS_DIR / dataset.name)
+        records_dir = make_path(self.RECORDS_DIR / dataset.name)
         chunks = list(chunk(ShuffleDataset(dataset), self.n_images_per_file))
 
         for chunk_number, dataset_chunk in enumerate(chunks):
diff --git a/src/research/roco_detection/__init__.py b/src/research/roco_detection/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/research/roco_detection/robots_dataset.py b/src/research/roco_detection/robots_dataset.py
new file mode 100644
index 0000000..d80955c
--- /dev/null
+++ b/src/research/roco_detection/robots_dataset.py
@@ -0,0 +1,37 @@
+from polystar.filters.filter_abc import FilterABC
+from polystar.target_pipeline.objects_filters.in_box_filter import InBoxObjectFilter
+from polystar.target_pipeline.objects_filters.type_object_filter import ARMORS_FILTER
+from polystar.view.plt_results_viewer import PltResultViewer
+from research.common.datasets.roco.roco_annotation import ROCOAnnotation
+from research.common.datasets.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo
+from research.roco_detection.small_base_filter import SMALL_BASE_FILTER
+
+
+def clear_small_bases(annotation: ROCOAnnotation):
+    small_bases, annotation.objects = SMALL_BASE_FILTER.split(annotation.objects)
+
+    if not small_bases:
+        return
+
+    armors, robots = ARMORS_FILTER.split(annotation.objects)
+    for base in small_bases:
+        armors = (-InBoxObjectFilter(base.box, 0.5)).filter(armors)
+    annotation.objects = robots + armors
+
+
+class AnnotationHasObjectsFilter(FilterABC[ROCOAnnotation]):
+    def validate_single(self, annotation: ROCOAnnotation) -> bool:
+        return bool(annotation.objects)
+
+
+if __name__ == "__main__":
+    for _img, _annotation, _name in (
+        (ROCODatasetsZoo.TWITCH.T470149066 | ROCODatasetsZoo.TWITCH.T470149568)
+        .shuffle()
+        .cap(10)
+        .to_air()
+        .filter_targets(AnnotationHasObjectsFilter())
+        .cap(30)
+    ):
+        with PltResultViewer(_name) as _viewer:
+            _viewer.display_image_with_objects(_img, _annotation.objects)
diff --git a/src/research/roco_detection/scripts/create_air_datasets.py b/src/research/roco_detection/scripts/create_air_datasets.py
new file mode 100644
index 0000000..d6a3d6a
--- /dev/null
+++ b/src/research/roco_detection/scripts/create_air_datasets.py
@@ -0,0 +1,7 @@
+from polystar.view.plt_results_viewer import PltResultViewer
+from research.common.datasets.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo
+
+if __name__ == "__main__":
+    for _img, _annotation, _name in ROCODatasetsZoo.TWITCH.T470149066.to_air().to_images().shuffle().cap(10):
+        with PltResultViewer(_name) as _v:
+            _v.display_image_with_objects(_img, _annotation.objects)
diff --git a/src/research/roco_detection/scripts/create_air_tensorflow_records.py b/src/research/roco_detection/scripts/create_air_tensorflow_records.py
new file mode 100644
index 0000000..94ff1bc
--- /dev/null
+++ b/src/research/roco_detection/scripts/create_air_tensorflow_records.py
@@ -0,0 +1,22 @@
+from typing import List
+
+from research.common.constants import TENSORFLOW_RECORDS_DIR
+from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilder
+from research.common.datasets.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo
+from research.dataset.tensorflow_record import ROCOTensorflowRecordFactory
+from research.roco_detection.robots_dataset import AnnotationHasObjectsFilter
+
+
+class AirTensorflowRecordFactory(ROCOTensorflowRecordFactory):
+    RECORDS_DIR = TENSORFLOW_RECORDS_DIR / "air"
+
+    def from_builders(self, builders: List[ROCODatasetBuilder], prefix: str = ""):
+        super().from_builders((b.to_air().filter_targets(AnnotationHasObjectsFilter()) for b in builders), prefix)
+
+
+if __name__ == "__main__":
+    _factory = AirTensorflowRecordFactory()
+    _factory.from_builders(ROCODatasetsZoo.TWITCH_TRAIN_DATASETS, "Twitch2_Train")
+    _factory.from_builders(ROCODatasetsZoo.TWITCH_VALIDATION_DATASETS, "Twitch2_Val")
+    _factory.from_builders(ROCODatasetsZoo.TWITCH_TEST_DATASETS, "Twitch2_Test")
+    _factory.from_builders(ROCODatasetsZoo.DJI, "Dji")
diff --git a/src/research/roco_detection/small_base_filter.py b/src/research/roco_detection/small_base_filter.py
new file mode 100644
index 0000000..b170706
--- /dev/null
+++ b/src/research/roco_detection/small_base_filter.py
@@ -0,0 +1,6 @@
+from polystar.models.roco_object import ObjectType
+from polystar.target_pipeline.objects_filters.objects_filter_abc import ObjectsFilterABC
+from polystar.target_pipeline.objects_filters.size_filter import SmallObjectFilter
+from polystar.target_pipeline.objects_filters.type_object_filter import TypeObjectsFilter
+
+SMALL_BASE_FILTER: ObjectsFilterABC = -TypeObjectsFilter({ObjectType.BASE}) | SmallObjectFilter(12_500)
-- 
GitLab