diff --git a/dataset/twitch/.gitignore b/dataset/twitch/.gitignore
index d1462f9c9f1d0a25d6d03aabeb9dd8b1b2b2391c..78791b52d04dc1c8758a53f19ace0acccef1b8f2 100644
--- a/dataset/twitch/.gitignore
+++ b/dataset/twitch/.gitignore
@@ -1,4 +1,7 @@
 /robots-views-annotations
 /reviewed-robots-views-annotations
 /final-robots-views
-/aerial-annotations
\ No newline at end of file
+/aerial-annotations
+/save
+/chunks-to-annotate
+/chunks-annotations
\ No newline at end of file
diff --git a/src/polystar/filters/filter_abc.py b/src/polystar/filters/filter_abc.py
index 435b1836979021bd096e81d3062400faa97932cb..794eee74a685d520cde95109137091e1048c61d0 100644
--- a/src/polystar/filters/filter_abc.py
+++ b/src/polystar/filters/filter_abc.py
@@ -13,7 +13,7 @@ class FilterABC(Generic[T], ABC):
 
     def split(self, examples: List[T]) -> Tuple[List[T], List[T]]:
         splits = self.split_with_siblings(examples)
-        return (splits[False][0], splits[True][0])
+        return splits[False][0], splits[True][0]
 
     def split_with_siblings(
         self, examples: List[T], *siblings: List
@@ -43,6 +43,9 @@ class FilterABC(Generic[T], ABC):
     def __and__(self, other: "FilterABC") -> "FilterABC[T]":
         return IntersectionFilter(self, other)
 
+    def __neg__(self) -> "FilterABC[T]":
+        return NegationFilter(self)
+
 
 class IntersectionFilter(FilterABC[T]):
     def __init__(self, *filters: FilterABC[T]):
@@ -55,7 +58,6 @@ class IntersectionFilter(FilterABC[T]):
 
 class UnionFilter(FilterABC[T]):
     def __init__(self, *filters: FilterABC[T]):
-        print(self, filters)
         self.filters = filters
         assert self.filters
 
@@ -63,6 +65,14 @@ class UnionFilter(FilterABC[T]):
         return any(f.validate_single(example) for f in self.filters)
 
 
+class NegationFilter(FilterABC[T]):
+    def __init__(self, base_filter: FilterABC[T]):
+        self.base_filter = base_filter
+
+    def validate_single(self, example: T) -> bool:
+        return not self.base_filter.validate_single(example)
+
+
 def _filter_with_siblings_from_preds(
     are_valid: List[bool], examples: List[T], *siblings: List, expected_value: bool = True
 ) -> Tuple[List[T], ...]:
diff --git a/src/polystar/models/image.py b/src/polystar/models/image.py
index 76ecbd4894d6a38566e8ae0cd66c91c33ff45d6f..0477a4c85d268b713dc08433a1a8904bb14b0c68 100644
--- a/src/polystar/models/image.py
+++ b/src/polystar/models/image.py
@@ -45,8 +45,12 @@ def load_images_in_directory(
 
 
 def save_image(image: Image, image_path: Path, conversion: int = cv2.COLOR_RGB2BGR):
-    image_path.parent.mkdir(exist_ok=True, parents=True)
-    cv2.imwrite(str(image_path), cv2.cvtColor(image, conversion))
+    try:
+        image_path.parent.mkdir(exist_ok=True, parents=True)
+        cv2.imwrite(str(image_path), cv2.cvtColor(image, conversion))
+    except:
+        print(f"Failed to save image {image_path}")
+        raise
 
 
 def file_images_to_images(file_images: Iterable[FileImage]) -> List[Image]:
diff --git a/src/polystar/models/roco_image_annotation.py b/src/polystar/models/roco_image_annotation.py
deleted file mode 100644
index 6c3b272b01eab459f8cffe7de312aab7ebe79e13..0000000000000000000000000000000000000000
--- a/src/polystar/models/roco_image_annotation.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import logging
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import List
-from xml.dom.minidom import parseString
-
-import xmltodict
-from dicttoxml import dicttoxml
-
-from polystar.models.image import Image, load_image, save_image
-from polystar.models.roco_object import ROCOObject, ROCOObjectFactory
-
-
-@dataclass
-class ROCOImageAnnotation:
-
-    image_path: Path
-    xml_path: Path
-
-    width: int
-    height: int
-
-    objects: List[ROCOObject]
-
-    has_rune: bool
-
-    _image: Image = field(repr=False, default=None)
-
-    @property
-    def image(self) -> Image:
-        if self._image is None:
-            self._image = load_image(self.image_path)
-        return self._image
-
-    @staticmethod
-    def from_xml_file(xml_file: Path) -> "ROCOImageAnnotation":
-        try:
-            annotation = xmltodict.parse(xml_file.read_text())["annotation"]
-
-            json_objects = annotation.get("object", []) or []
-            json_objects = json_objects if isinstance(json_objects, list) else [json_objects]
-            roco_json_objects = [obj_json for obj_json in json_objects if not obj_json["name"].startswith("rune")]
-            objects = [ROCOObjectFactory.from_json(obj_json) for obj_json in roco_json_objects]
-
-            return ROCOImageAnnotation(
-                width=int(annotation["size"]["width"]),
-                height=int(annotation["size"]["height"]),
-                objects=objects,
-                image_path=xml_file.parent.parent / "image" / f"{xml_file.stem}.jpg",
-                xml_path=xml_file,
-                has_rune=len(roco_json_objects) != len(json_objects),
-            )
-        except Exception as e:
-            logging.error(f"Error parsing annotation file {xml_file}")
-            logging.exception(e)
-            raise e
-
-    def to_xml(self) -> str:
-        return parseString(
-            dicttoxml(
-                {
-                    "annotation": {
-                        "size": {"width": self.width, "height": self.height},
-                        "object": [ROCOObjectFactory.to_json(obj) for obj in self.objects],
-                    }
-                },
-                attr_type=False,
-                root="annotation",
-                item_func=lambda x: x,
-            )
-            .replace(b"<object><object>", b"<object>")
-            .replace(b"</object></object>", b"</object>")
-        ).toprettyxml()
-
-    def save_to_dir(self, directory: Path, image_name: str):
-        self.image_path = (directory / "image" / image_name).with_suffix(".jpg")
-        self.xml_path = (directory / "image_annotation" / image_name).with_suffix(".xml")
-
-        self.image_path.parent.mkdir(exist_ok=True, parents=True)
-        self.xml_path.parent.mkdir(exist_ok=True, parents=True)
-
-        save_image(self.image, self.image_path)
-        self.xml_path.write_text(self.to_xml())
diff --git a/src/polystar/target_pipeline/objects_filters/size_filter.py b/src/polystar/target_pipeline/objects_filters/size_filter.py
new file mode 100644
index 0000000000000000000000000000000000000000..184a358b8996c4d5904d3a905d25bc19ffa2c98e
--- /dev/null
+++ b/src/polystar/target_pipeline/objects_filters/size_filter.py
@@ -0,0 +1,12 @@
+from dataclasses import dataclass
+
+from polystar.models.roco_object import ROCOObject
+from polystar.target_pipeline.objects_filters.objects_filter_abc import ObjectsFilterABC
+
+
+@dataclass
+class SmallObjectFilter(ObjectsFilterABC):
+    min_size: int
+
+    def validate_single(self, obj: ROCOObject) -> bool:
+        return obj.box.area >= self.min_size
diff --git a/src/polystar/target_pipeline/objects_filters/type_object_filter.py b/src/polystar/target_pipeline/objects_filters/type_object_filter.py
index b8d28e47c4bf21fc58f2eb1730a204ad8328523d..3fe846e481466fe496318ca464b2fe9fa9f7e524 100644
--- a/src/polystar/target_pipeline/objects_filters/type_object_filter.py
+++ b/src/polystar/target_pipeline/objects_filters/type_object_filter.py
@@ -12,3 +12,6 @@ class TypeObjectsFilter(ObjectsFilterABC):
 
     def validate_single(self, obj: ROCOObject) -> bool:
         return obj.type in self.desired_types
+
+
+ARMORS_FILTER: ObjectsFilterABC = -TypeObjectsFilter({ObjectType.ARMOR})
diff --git a/src/polystar/utils/iterable_utils.py b/src/polystar/utils/iterable_utils.py
index 3467c9fe02d7490afbdf62d2559dbfb13ec91a57..dd3f30213440e466f69d9f3f21ad085423c1bf50 100644
--- a/src/polystar/utils/iterable_utils.py
+++ b/src/polystar/utils/iterable_utils.py
@@ -1,5 +1,6 @@
 from collections import defaultdict
 from itertools import chain
+from random import shuffle
 from typing import Any, Callable, Dict, Iterable, List, TypeVar
 
 from more_itertools import ilen
@@ -43,3 +44,9 @@ def chunk(it: Iterable[T], batch_size: float) -> Iterable[List[T]]:
 def apply(f: Callable[[T], Any], it: Iterable[T]):
     for el in it:
         f(el)
+
+
+def shuffle_iterable(it: Iterable[T]) -> List[T]:
+    rv = list(it)
+    shuffle(rv)
+    return rv
diff --git a/src/polystar/utils/path.py b/src/polystar/utils/path.py
index 72e8a997d5d2a10d07dd94ddefa7949be1d8b1c5..5f72847782df3f4c9b192bfef0e8713e40c3f5d2 100644
--- a/src/polystar/utils/path.py
+++ b/src/polystar/utils/path.py
@@ -1,4 +1,4 @@
-from os import remove
+from os import PathLike, remove
 from pathlib import Path
 from shutil import copy, make_archive, move
 from typing import Iterable
@@ -24,3 +24,9 @@ def copy_file(source: Path, destination_directory: Path) -> Path:
 
 def archive_directory(directory:Path):
     make_archive(str(directory), "zip", str(directory))
+
+
+def make_path(p: PathLike) -> Path:
+    p = Path(p)
+    p.mkdir(exist_ok=True, parents=True)
+    return p
diff --git a/src/polystar/utils/thread.py b/src/polystar/utils/thread.py
index 4cf3b45289aefe74e56bdf3bfff77cc1d7e3da6b..bd887946c5e6a66d7f3b96437b3208e15462c7df 100644
--- a/src/polystar/utils/thread.py
+++ b/src/polystar/utils/thread.py
@@ -1,6 +1,8 @@
 from threading import Thread
 from typing import List
 
+from polystar.utils.iterable_utils import apply
+
 
 class MyThread(Thread):
     THREADS: List["MyThread"] = []
@@ -27,5 +29,8 @@ class MyThread(Thread):
 
     @staticmethod
     def close():
-        for thread in MyThread.THREADS:
-            thread.stop()
+        apply(MyThread.stop, MyThread.THREADS)
+
+    @staticmethod
+    def join_all():
+        apply(Thread.join, MyThread.THREADS)
diff --git a/src/polystar/view/results_viewer_abc.py b/src/polystar/view/results_viewer_abc.py
index ea78e95c28b5ca6b54bc4df2da3ca9b1e55f78e3..7cfb04a616afe9d6b84705e12bf0860c35a5f7d9 100644
--- a/src/polystar/view/results_viewer_abc.py
+++ b/src/polystar/view/results_viewer_abc.py
@@ -3,7 +3,6 @@ from itertools import cycle
 from typing import Iterable, Sequence, Tuple
 
 from polystar.models.image import Image
-from polystar.models.roco_image_annotation import ROCOImageAnnotation
 from polystar.models.roco_object import ROCOObject
 from polystar.target_pipeline.debug_pipeline import DebugInfo
 from polystar.target_pipeline.detected_objects.detected_robot import DetectedRobot, FakeDetectedRobot
@@ -55,9 +54,6 @@ class ResultViewerABC(ABC):
         self.add_objects(objects)
         self.display()
 
-    def display_image_annotation(self, annotation: ROCOImageAnnotation):
-        self.display_image_with_objects(annotation.image, annotation.objects)
-
     def display_debug_info(self, debug_info: DebugInfo):
         self.add_debug_info(debug_info)
         self.display()
diff --git a/src/research/armors/dataset/armor_dataset_factory.py b/src/research/armors/dataset/armor_dataset_factory.py
deleted file mode 100644
index 94927f066a96bdda32a09d8d3a33ac7313b7e11f..0000000000000000000000000000000000000000
--- a/src/research/armors/dataset/armor_dataset_factory.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from itertools import islice
-from typing import Iterator, Tuple
-
-import matplotlib.pyplot as plt
-
-from polystar.models.image import Image
-from polystar.models.roco_object import Armor
-from research.common.datasets.lazy_dataset import LazyDataset
-from research.common.datasets.roco.roco_annotation import ROCOAnnotation
-from research.common.datasets.roco.roco_dataset import LazyROCODataset
-from research.common.datasets.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo
-
-
-class ArmorDataset(LazyDataset[Image, Armor]):
-    def __init__(self, dataset: LazyROCODataset):
-        super().__init__(f"{dataset.name}_armors")
-        self.roco_dataset = dataset
-
-    def __iter__(self) -> Iterator[Tuple[Image, Armor, str]]:
-        for image, annotation, name in self.roco_dataset:
-            yield from self._generate_from_single(image, annotation, name)
-
-    @staticmethod
-    def _generate_from_single(image: Image, annotation: ROCOAnnotation, name) -> Iterator[Tuple[Image, Armor, str]]:
-        for i, obj in enumerate(annotation.armors):
-            croped_img = image[obj.box.y1 : obj.box.y2, obj.box.x1 : obj.box.x2]
-            yield croped_img, obj, f"{name}-{i}"
-
-
-if __name__ == "__main__":
-    for _armor_img, _armor, _name in islice(ArmorDataset(ROCODatasetsZoo.DJI.CENTRAL_CHINA.to_images()), 20, 30):
-        print(_name, repr(_armor))
-        plt.imshow(_armor_img)
-        plt.show()
-        plt.clf()
diff --git a/src/research/armors/dataset/armor_value_dataset_cache.py b/src/research/armors/dataset/armor_value_dataset_cache.py
index 51b0e92e470b74753022b93aca4a09a8033cb1a0..d03352f7c434ab7ba610d5dcb226fa1045d1835f 100644
--- a/src/research/armors/dataset/armor_value_dataset_cache.py
+++ b/src/research/armors/dataset/armor_value_dataset_cache.py
@@ -1,71 +1,33 @@
 import json
 from pathlib import Path
 from shutil import rmtree
-from typing import ClassVar, Generic, Optional
-
-from google.cloud.exceptions import Forbidden
+from typing import ClassVar, Generic, Optional, TypeVar
 
 from polystar.models.image import Image, save_image
-from polystar.utils.misc import identity
 from polystar.utils.time import create_time_id
 from polystar.utils.tqdm import smart_tqdm
-from research.armors.dataset.armor_dataset_factory import ArmorDataset
-from research.armors.dataset.armor_value_target_factory import ArmorValueTargetFactory
-from research.common.datasets.lazy_dataset import LazyDataset, TargetT
-from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilder
-from research.common.datasets.transform_dataset import TransformDataset
-from research.common.gcloud.gcloud_storage import GCStorages
+from research.common.datasets.lazy_dataset import LazyDataset
+
+T = TypeVar("T")
 
 
-class ArmorValueDatasetCache(Generic[TargetT]):
+# TODO: add AWS support here
+class DatasetCache(Generic[T]):
     VERSION: ClassVar[str] = "2.0"
 
-    def __init__(
-        self,
-        roco_dataset_builder: ROCODatasetBuilder,
-        cache_dir: Path,
-        dataset_name: str,
-        target_factory: ArmorValueTargetFactory[TargetT],
-    ):
-        self.target_factory = target_factory
-        self.dataset_name = dataset_name
+    def __init__(self, cache_dir: Path, dataset: LazyDataset[Image, T]):
+        self.dataset = dataset
         self.cache_dir = cache_dir
-        self.roco_dataset_builder = roco_dataset_builder
         self.lock_file = cache_dir / ".lock"
 
-        self.cache_dir.mkdir(parents=True, exist_ok=True)
+        self.cache_dir.mkdir(exist_ok=True, parents=True)
 
-    def generate_or_download_if_needed(self):
+    def generate_if_missing(self):
         cause = self._get_generation_cause()
         if cause is None:
             return
         self._clean_cache_dir()
-        try:
-            GCStorages.DEV.download_directory(self.cache_dir)
-            cause = self._get_generation_cause()
-            if cause is None:
-                return
-            self._clean_cache_dir()
-        except FileNotFoundError:
-            cause += " and not on gcloud"
-        except Forbidden:
-            pass
-        self.save(self._generate(), cause)
-
-    def _clean_cache_dir(self):
-        rmtree(self.cache_dir, ignore_errors=True)
-        self.cache_dir.mkdir()
-
-    def save(self, dataset: LazyDataset[Image, TargetT], cause: str):
-        desc = f"Generating dataset {self.dataset_name} (cause: {cause})"
-        for img, target, name in smart_tqdm(dataset, desc=desc, unit="img"):
-            save_image(img, self.cache_dir / f"{name}-{str(target)}.jpg")
-        self.lock_file.write_text(json.dumps({"version": self.VERSION, "date": create_time_id()}))
-
-    def _generate(self) -> LazyDataset[Image, TargetT]:
-        return TransformDataset(
-            ArmorDataset(self.roco_dataset_builder.to_images().build_lazy()), identity, self.target_factory.from_armor
-        )
+        self._generate(cause)
 
     def _get_generation_cause(self) -> Optional[str]:
         if not self.lock_file.exists():
@@ -73,3 +35,17 @@ class ArmorValueDatasetCache(Generic[TargetT]):
         version = json.loads(self.lock_file.read_text())["version"]
         if version != self.VERSION:
             return f"upgrade [{version} -> {self.VERSION}]"
+
+    def _clean_cache_dir(self):
+        rmtree(self.cache_dir, ignore_errors=True)
+        self.cache_dir.mkdir()
+
+    def _generate(self, cause: str):
+        desc = f"Generating dataset {self.dataset.name} (cause: {cause})"
+        for img, target, name in smart_tqdm(self.dataset, desc=desc, unit="img"):
+            self._save_one(img, target, name)
+
+        self.lock_file.write_text(json.dumps({"version": self.VERSION, "date": create_time_id()}))
+
+    def _save_one(self, img: Image, target: T, name: str):
+        save_image(img, self.cache_dir / f"{name}-{str(target)}.jpg")
diff --git a/src/research/armors/dataset/armor_value_dataset_generator.py b/src/research/armors/dataset/armor_value_dataset_generator.py
index 8f9ed9d186d0319c6904f39fd2759cd76974be6b..24f0d892487432c951b17e81b04a59db2f372de4 100644
--- a/src/research/armors/dataset/armor_value_dataset_generator.py
+++ b/src/research/armors/dataset/armor_value_dataset_generator.py
@@ -5,7 +5,7 @@ from polystar.filters.exclude_filter import ExcludeFilter
 from polystar.filters.filter_abc import FilterABC
 from polystar.filters.pass_through_filter import PassThroughFilter
 from polystar.models.image import FileImage
-from research.armors.dataset.armor_value_dataset_cache import ArmorValueDatasetCache
+from research.armors.dataset.armor_value_dataset_cache import DatasetCache
 from research.armors.dataset.armor_value_target_factory import ArmorValueTargetFactory
 from research.common.datasets.dataset import Dataset
 from research.common.datasets.image_file_dataset_builder import DirectoryDatasetBuilder
@@ -58,14 +58,13 @@ class ArmorValueDatasetGenerator(Generic[TargetT]):
 
     def from_roco_dataset(self, roco_dataset_builder: ROCODatasetBuilder) -> DirectoryDatasetBuilder[TargetT]:
         cache_dir = roco_dataset_builder.main_dir / self.task_name
-        dataset_name = roco_dataset_builder.name
 
-        ArmorValueDatasetCache(
-            roco_dataset_builder, cache_dir, dataset_name, self.target_factory
-        ).generate_or_download_if_needed()
+        DatasetCache(
+            cache_dir, roco_dataset_builder.to_armors().transform_targets(self.target_factory.from_armor).build_lazy()
+        ).generate_if_missing()
 
         return (
-            DirectoryDatasetBuilder(cache_dir, self.target_factory.from_file, dataset_name)
+            DirectoryDatasetBuilder(cache_dir, self.target_factory.from_file, roco_dataset_builder.name)
             .filter_targets(self.targets_filter)
             .filter_examples(ExcludeFilesFilter(DatasetChanges(cache_dir).invalidated))
         )
diff --git a/src/research/common/datasets/dataset_builder.py b/src/research/common/datasets/dataset_builder.py
index 68b595a61a424c3ece0d6adae108100f3143b19b..90184301185c7170234fa054add191a013c38930 100644
--- a/src/research/common/datasets/dataset_builder.py
+++ b/src/research/common/datasets/dataset_builder.py
@@ -6,9 +6,11 @@ from polystar.utils.misc import identity
 from research.common.datasets.dataset import Dataset
 from research.common.datasets.filter_dataset import ExampleU, FilterDataset, TargetU
 from research.common.datasets.lazy_dataset import ExampleT, LazyDataset, TargetT
+from research.common.datasets.observable_dataset import ObservableDataset
 from research.common.datasets.shuffle_dataset import ShuffleDataset
 from research.common.datasets.slice_dataset import SliceDataset
 from research.common.datasets.transform_dataset import TransformDataset
+from research.common.datasets.union_dataset import UnionLazyDataset
 
 
 class DatasetBuilder(Generic[ExampleT, TargetT], Iterable[Tuple[ExampleT, TargetT, str]]):
@@ -59,6 +61,14 @@ class DatasetBuilder(Generic[ExampleT, TargetT], Iterable[Tuple[ExampleT, Target
         self.dataset = TransformDataset(self.dataset, identity, target_transformer)
         return self
 
+    def apply_to_examples(self, example_observable: Callable[[ExampleT], None]) -> "DatasetBuilder[ExampleU, TargetT]":
+        self.dataset = ObservableDataset(self.dataset, example_observable, identity)
+        return self
+
+    def apply_to_targets(self, target_observable: Callable[[TargetT], None]) -> "DatasetBuilder[ExampleT, TargetU]":
+        self.dataset = ObservableDataset(self.dataset, identity, target_observable)
+        return self
+
     def shuffle(self) -> "DatasetBuilder[ExampleT, TargetU]":
         self.dataset = ShuffleDataset(self.dataset)
         return self
@@ -71,6 +81,10 @@ class DatasetBuilder(Generic[ExampleT, TargetT], Iterable[Tuple[ExampleT, Target
         self.dataset = SliceDataset(self.dataset, start=n)
         return self
 
+    def __or__(self, other: "DatasetBuilder[ExampleT, TargetT]"):
+        self.dataset = UnionLazyDataset((self.dataset, other.dataset))
+        return self
+
     @property
     def name(self) -> str:
         return self.dataset.name
diff --git a/src/research/common/datasets/observable_dataset.py b/src/research/common/datasets/observable_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..4378280fac05513dd8c7dd65b8e7f12bc928a41c
--- /dev/null
+++ b/src/research/common/datasets/observable_dataset.py
@@ -0,0 +1,26 @@
+from typing import Callable, Iterator, Tuple
+
+from research.common.datasets.filter_dataset import ExampleU, TargetU
+from research.common.datasets.lazy_dataset import ExampleT, LazyDataset, TargetT
+
+
+class ObservableDataset(LazyDataset[ExampleT, TargetT]):
+    def __init__(
+        self,
+        source: LazyDataset[ExampleT, TargetT],
+        example_observable: Callable[[ExampleT], None],
+        target_observable: Callable[[TargetT], None],
+    ):
+        self.target_observable = target_observable
+        self.example_observable = example_observable
+        self.source = source
+        super().__init__(source.name)
+
+    def __iter__(self) -> Iterator[Tuple[ExampleU, TargetU, str]]:
+        for example, target, name in self.source:
+            self.example_observable(example)
+            self.target_observable(target)
+            yield example, target, name
+
+    def __len__(self):
+        return len(self.source)
diff --git a/src/research/common/datasets/roco/air_dataset.py b/src/research/common/datasets/roco/air_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..f3ffa6d0e45761b087dc5e3accd0df5116e2fadf
--- /dev/null
+++ b/src/research/common/datasets/roco/air_dataset.py
@@ -0,0 +1,37 @@
+from typing import Iterator, Tuple
+
+from polystar.models.image import Image
+from polystar.target_pipeline.objects_filters.objects_filter_abc import ObjectsFilterABC
+from polystar.target_pipeline.objects_filters.type_object_filter import ARMORS_FILTER
+from research.armors.dataset.armor_value_dataset_cache import DatasetCache
+from research.common.datasets.lazy_dataset import LazyDataset
+from research.common.datasets.roco.roco_annotation import ROCOAnnotation
+from research.common.datasets.roco.roco_dataset import LazyROCODataset
+from research.dataset.improvement.zoom import crop_image_annotation
+
+
+class AIRDataset(LazyDataset[Image, ROCOAnnotation]):
+    """Armors In Robots"""
+
+    def __init__(self, roco_dataset: LazyROCODataset, robots_filter: ObjectsFilterABC):
+        super().__init__(roco_dataset.name + "_AIR")
+        self.robots_filter = robots_filter
+        self.roco_dataset = roco_dataset
+
+    def __iter__(self) -> Iterator[Tuple[Image, ROCOAnnotation, str]]:
+        for image, annotation, name in self.roco_dataset:
+            yield from self._generate_from_single(image, annotation, name)
+
+    def _generate_from_single(
+        self, image: Image, annotation: ROCOAnnotation, name: str
+    ) -> Iterator[Tuple[Image, ROCOAnnotation, str]]:
+        annotation.objects, robots = ARMORS_FILTER.split(annotation.objects)
+        for i, robot in enumerate(self.robots_filter.filter(robots)):
+            yield crop_image_annotation(
+                image, annotation, robot.box, min_coverage=0.75, name=f"{name}-{i}-{robot.type.name.lower()}"
+            )
+
+
+class AIRDatasetCache(DatasetCache[ROCOAnnotation]):
+    def _save_one(self, img: Image, annotation: ROCOAnnotation, name: str):
+        annotation.save_with_image(self.cache_dir, img, name)
diff --git a/src/research/common/datasets/roco/roco_annotation.py b/src/research/common/datasets/roco/roco_annotation.py
index 8c262f0f82a9ffcad64fc123e0436de20e91d9cc..5d4a0a2dc25364bb8bbf51690169674ff4d47d4a 100644
--- a/src/research/common/datasets/roco/roco_annotation.py
+++ b/src/research/common/datasets/roco/roco_annotation.py
@@ -7,7 +7,8 @@ from xml.dom.minidom import parseString
 import xmltodict
 from dicttoxml import dicttoxml
 
-from polystar.models.roco_object import Armor, ROCOObject, ROCOObjectFactory
+from polystar.models.image import Image, save_image
+from polystar.models.roco_object import ROCOObject, ROCOObjectFactory
 from polystar.utils.path import move_file
 
 
@@ -20,10 +21,6 @@ class ROCOAnnotation:
     w: int
     h: int
 
-    @property
-    def armors(self) -> List[Armor]:
-        return [obj for obj in self.objects if isinstance(obj, Armor)]
-
     @staticmethod
     def from_xml_file(xml_file: Path) -> "ROCOAnnotation":
         try:
@@ -43,6 +40,7 @@ class ROCOAnnotation:
         objects = [
             ROCOObjectFactory(image_w=image_w, image_h=image_h).from_json(obj_json) for obj_json in roco_json_objects
         ]
+        objects = [obj for obj in objects if obj.box.w > 0 and obj.box.h > 0]
 
         return ROCOAnnotation(
             objects=objects, has_rune=len(roco_json_objects) != len(json_objects), w=image_w, h=image_h,
@@ -52,6 +50,10 @@ class ROCOAnnotation:
         directory.mkdir(exist_ok=True, parents=True)
         self.save_in_file((directory / name).with_suffix(".xml"))
 
+    def save_with_image(self, directory: Path, image: Image, name: str):
+        self.save_in_directory(directory / "image_annotation", name)
+        save_image(image, (directory / "image" / name).with_suffix(".jpg"))
+
     def save_in_file(self, file: Path):
         file.write_text(self.to_xml())
 
diff --git a/src/research/common/datasets/roco/roco_dataset_builder.py b/src/research/common/datasets/roco/roco_dataset_builder.py
index bb402226b5963c378802d54fbd5eeceb3f234a18..af9eb3c791fdd06805a67d2f2fbf084818c1cfa2 100644
--- a/src/research/common/datasets/roco/roco_dataset_builder.py
+++ b/src/research/common/datasets/roco/roco_dataset_builder.py
@@ -1,14 +1,36 @@
 from pathlib import Path
 
+from polystar.models.image import Image
+from polystar.models.roco_object import Armor, ObjectType, ROCOObject
+from polystar.target_pipeline.objects_filters.type_object_filter import TypeObjectsFilter
+from research.common.constants import DSET_DIR
+from research.common.datasets.dataset_builder import DatasetBuilder
 from research.common.datasets.image_file_dataset_builder import DirectoryDatasetBuilder
+from research.common.datasets.roco.air_dataset import AIRDataset, AIRDatasetCache
 from research.common.datasets.roco.roco_annotation import ROCOAnnotation
+from research.common.datasets.roco.roco_objects_dataset import ROCOObjectsDataset
+from research.roco_detection.small_base_filter import SMALL_BASE_FILTER
 
 
 class ROCODatasetBuilder(DirectoryDatasetBuilder[ROCOAnnotation]):
     def __init__(self, directory: Path, name: str, extension: str = "jpg"):
-        super().__init__(directory / "image", self.roco_annotation_from_image_file, name, extension)
+        super().__init__(directory / "image", self._roco_annotation_from_image_file, name, extension)
         self.annotations_dir = directory / "image_annotation"
         self.main_dir = directory
 
-    def roco_annotation_from_image_file(self, image_file: Path) -> ROCOAnnotation:
+    def to_objects(self) -> DatasetBuilder[Image, ROCOObject]:
+        return DatasetBuilder(ROCOObjectsDataset(self.to_images()))
+
+    def to_armors(self) -> DatasetBuilder[Image, Armor]:
+        builder = self.to_objects().filter_targets(TypeObjectsFilter({ObjectType.ARMOR}))
+        builder.name = builder.name.replace("objects", "armors")
+        return builder
+
+    # FIXME: it makes no sense to have a ROCODatasetBuilder as output
+    def to_air(self) -> "ROCODatasetBuilder":
+        cache_dir = DSET_DIR / "air" / self.main_dir.relative_to(DSET_DIR)
+        AIRDatasetCache(cache_dir, AIRDataset(self.to_images(), SMALL_BASE_FILTER)).generate_if_missing()
+        return ROCODatasetBuilder(cache_dir, self.name + "_AIR")
+
+    def _roco_annotation_from_image_file(self, image_file: Path) -> ROCOAnnotation:
         return ROCOAnnotation.from_xml_file(self.annotations_dir / f"{image_file.stem}.xml")
diff --git a/src/research/common/datasets/roco/roco_datasets.py b/src/research/common/datasets/roco/roco_datasets.py
index b7765e94746ea8fadd1a20ada6c71d72c1e0fb2d..dbecdcdec9add9f5939986cdcc3e4d8955f19e1d 100644
--- a/src/research/common/datasets/roco/roco_datasets.py
+++ b/src/research/common/datasets/roco/roco_datasets.py
@@ -7,7 +7,6 @@ from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilde
 from research.common.datasets.union_dataset import UnionLazyDataset
 
 
-# FIXME : we should be able to access a builder 2 times
 class ROCODatasetsMeta(type):
     def __init__(cls, name: str, bases, dct):
         super().__init__(name, bases, dct)
diff --git a/src/research/common/datasets/roco/roco_objects_dataset.py b/src/research/common/datasets/roco/roco_objects_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..8cf466da6572b3d6253e507d90ec51cebef6bd40
--- /dev/null
+++ b/src/research/common/datasets/roco/roco_objects_dataset.py
@@ -0,0 +1,25 @@
+from typing import Iterator, Tuple
+
+from polystar.models.image import Image
+from polystar.models.roco_object import Armor, ROCOObject
+from research.common.datasets.lazy_dataset import LazyDataset
+from research.common.datasets.roco.roco_annotation import ROCOAnnotation
+from research.common.datasets.roco.roco_dataset import LazyROCODataset
+
+
+class ROCOObjectsDataset(LazyDataset[Image, ROCOObject]):
+    def __init__(self, dataset: LazyROCODataset):
+        super().__init__(f"{dataset.name}_objects")
+        self.roco_dataset = dataset
+
+    def __iter__(self) -> Iterator[Tuple[Image, ROCOObject, str]]:
+        for image, annotation, name in self.roco_dataset:
+            yield from self._generate_from_single(image, annotation, name)
+
+    @staticmethod
+    def _generate_from_single(
+        image: Image, annotation: ROCOAnnotation, name: str
+    ) -> Iterator[Tuple[Image, Armor, str]]:
+        for i, obj in enumerate(annotation.objects):
+            croped_img = image[obj.box.y1 : obj.box.y2, obj.box.x1 : obj.box.x2]
+            yield croped_img, obj, f"{name}-{i}"
diff --git a/src/research/common/datasets/roco/zoo/roco_dataset_zoo.py b/src/research/common/datasets/roco/zoo/roco_dataset_zoo.py
index 65a982acf125e98c6726df18f28eb8fb285b7788..57065038fe494c0bb41ae6fbaca7432513ccd854 100644
--- a/src/research/common/datasets/roco/zoo/roco_dataset_zoo.py
+++ b/src/research/common/datasets/roco/zoo/roco_dataset_zoo.py
@@ -1,4 +1,4 @@
-from typing import Iterable, Iterator, Type
+from typing import Iterable, Iterator, List, Type
 
 from research.common.datasets.roco.roco_datasets import ROCODatasets
 from research.common.datasets.roco.zoo.dji import DJIROCODatasets
@@ -6,31 +6,38 @@ from research.common.datasets.roco.zoo.dji_zoomed import DJIROCOZoomedDatasets
 from research.common.datasets.roco.zoo.twitch import TwitchROCODatasets
 
 
-class ROCODatasetsZoo(Iterable[Type[ROCODatasets]]):
+# FIXME: find a better way to do that (builder need to be instantiated once per call)
+# FIXME: improve the singleton pattern here
+class ROCODatasetsZooClass(Iterable[Type[ROCODatasets]]):
     DJI_ZOOMED = DJIROCOZoomedDatasets
     DJI = DJIROCODatasets
     TWITCH = TwitchROCODatasets
 
-    TWITCH_TRAIN_DATASETS = [
-        TWITCH.T470149066,
-        TWITCH.T470150052,
-        TWITCH.T470152289,
-        TWITCH.T470153081,
-        TWITCH.T470158483,
-    ]
-    TWITCH_VALIDATION_DATASETS = [TWITCH.T470152932, TWITCH.T470149568]
-    TWITCH_TEST_DATASETS = [TWITCH.T470152838, TWITCH.T470151286]
-
-    DJI_TRAIN_DATASETS = [DJI.FINAL, DJI.CENTRAL_CHINA, DJI.NORTH_CHINA, DJI.SOUTH_CHINA]
-    DJI_ZOOMED_TRAIN_DATASETS = [
-        DJI_ZOOMED.FINAL,
-        DJI_ZOOMED.CENTRAL_CHINA,
-        DJI_ZOOMED.NORTH_CHINA,
-        DJI_ZOOMED.SOUTH_CHINA,
-    ]
-
-    TWITCH_DJI_TRAIN_DATASETS = TWITCH_TRAIN_DATASETS + DJI_TRAIN_DATASETS
-    TWITCH_DJI_ZOOMED_TRAIN_DATASETS = TWITCH_TRAIN_DATASETS + DJI_TRAIN_DATASETS
+    @property
+    def TWITCH_TRAIN_DATASETS(self) -> List[ROCODatasets]:
+        return [
+            self.TWITCH.T470149066,
+            self.TWITCH.T470150052,
+            self.TWITCH.T470152289,
+            self.TWITCH.T470153081,
+            self.TWITCH.T470158483,
+        ]
+
+    @property
+    def TWITCH_VALIDATION_DATASETS(self) -> List[ROCODatasets]:
+        return [self.TWITCH.T470152932, self.TWITCH.T470149568]
+
+    @property
+    def TWITCH_TEST_DATASETS(self) -> List[ROCODatasets]:
+        return [self.TWITCH.T470152838, self.TWITCH.T470151286]
+
+    @property
+    def TWITCH_DJI_TRAIN_DATASETS(self) -> List[ROCODatasets]:
+        return self.TWITCH_TRAIN_DATASETS + list(self.DJI)
+
+    @property
+    def TWITCH_DJI_ZOOMED_TRAIN_DATASETS(self) -> List[ROCODatasets]:
+        return self.TWITCH_TRAIN_DATASETS + list(self.DJI_ZOOMED)
 
     DEFAULT_TEST_DATASETS = TWITCH_TEST_DATASETS
     DEFAULT_VALIDATION_DATASETS = TWITCH_VALIDATION_DATASETS
@@ -40,4 +47,9 @@ class ROCODatasetsZoo(Iterable[Type[ROCODatasets]]):
         return iter((self.DJI, self.DJI_ZOOMED, self.TWITCH))
 
 
-ROCODatasetsZoo = ROCODatasetsZoo()
+ROCODatasetsZoo = ROCODatasetsZooClass()
+
+
+if __name__ == "__main__":
+    ROCODatasetsZoo.DEFAULT_TEST_DATASETS[0].build_lazy()
+    ROCODatasetsZoo.DEFAULT_TEST_DATASETS[0].build_lazy()
diff --git a/src/research/common/datasets/shuffle_dataset.py b/src/research/common/datasets/shuffle_dataset.py
index ad28020701a07907d64a2ebc48a74dfa73c1ccf9..3b45466ba148cc3dda6368ba7f451b0173aa8ec3 100644
--- a/src/research/common/datasets/shuffle_dataset.py
+++ b/src/research/common/datasets/shuffle_dataset.py
@@ -1,6 +1,6 @@
-from random import shuffle
 from typing import Iterator, Tuple
 
+from polystar.utils.iterable_utils import shuffle_iterable
 from research.common.datasets.lazy_dataset import ExampleT, LazyDataset, TargetT
 
 
@@ -10,6 +10,4 @@ class ShuffleDataset(LazyDataset):
         self.source = source
 
     def __iter__(self) -> Iterator[Tuple[ExampleT, TargetT]]:
-        data = list(self.source)
-        shuffle(data)
-        return iter(data)
+        return iter(shuffle_iterable(self.source))
diff --git a/src/research/common/datasets/transform_dataset.py b/src/research/common/datasets/transform_dataset.py
index 54251be04c332a19a306ea93cc3e5c71c169638d..eed420e5391b246bbe596bcc0edef176acccbaf8 100644
--- a/src/research/common/datasets/transform_dataset.py
+++ b/src/research/common/datasets/transform_dataset.py
@@ -1,6 +1,6 @@
 from typing import Callable, Iterator, Tuple
 
-from research.common.datasets.dataset_builder import ExampleU, TargetU
+from research.common.datasets.filter_dataset import ExampleU, TargetU
 from research.common.datasets.lazy_dataset import ExampleT, LazyDataset, TargetT
 
 
diff --git a/src/research/common/datasets/union_dataset.py b/src/research/common/datasets/union_dataset.py
index 4a381a6f59377db560829e8e17d03b3aa1231604..1de4989e5b3e4fb566800f402448977db9c5aec3 100644
--- a/src/research/common/datasets/union_dataset.py
+++ b/src/research/common/datasets/union_dataset.py
@@ -7,7 +7,7 @@ from research.common.datasets.lazy_dataset import ExampleT, LazyDataset, TargetT
 class UnionLazyDataset(LazyDataset[ExampleT, TargetT]):
     def __init__(self, datasets: Iterable[LazyDataset[ExampleT, TargetT]], name: str = None):
         self.datasets = list(datasets)
-        super().__init__(name or _name_from_union(self.datasets))
+        super().__init__(name or name_from_union(self.datasets))
 
     def __iter__(self) -> Iterator[Tuple[ExampleT, TargetT, str]]:
         for dataset in self.datasets:
@@ -24,9 +24,9 @@ class UnionDataset(Dataset[ExampleT, TargetT]):
             sum((d.examples for d in self.datasets), []),
             sum((d.targets for d in self.datasets), []),
             sum((d.names for d in self.datasets), []),
-            name or _name_from_union(self.datasets),
+            name or name_from_union(self.datasets),
         )
 
 
-def _name_from_union(datasets: List[LazyDataset]):
+def name_from_union(datasets: List[LazyDataset]):
     return "_".join(d.name for d in datasets)
diff --git a/src/research/dataset/improvement/zoom.py b/src/research/dataset/improvement/zoom.py
index 8e73d74c64f871c6d46eca6845b7fd8e2ffd38e6..fee3fb25a730fbd04dfbf1a87a8c364291fc28c6 100644
--- a/src/research/dataset/improvement/zoom.py
+++ b/src/research/dataset/improvement/zoom.py
@@ -8,7 +8,6 @@ from polystar.models.image import Image
 from polystar.target_pipeline.objects_filters.in_box_filter import InBoxObjectFilter
 from polystar.view.plt_results_viewer import PltResultViewer
 from research.common.datasets.roco.roco_annotation import ROCOAnnotation
-from research.common.datasets.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo
 
 
 def crop_image_annotation(
@@ -120,6 +119,8 @@ class Zoomer:
 
 
 if __name__ == "__main__":
+    from research.common.datasets.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo
+
     _zoomer = Zoomer(854, 480, 0.15, 0.5)
 
     for _img, _annot, _name in islice(ROCODatasetsZoo.DJI.NORTH_CHINA.lazy(), 0, 3):
diff --git a/src/research/dataset/scripts/create_tensorflow_records.py b/src/research/dataset/scripts/create_tensorflow_records.py
index 21486eb7956ce97906552c061ad545e4200bcc77..49530a35c93d3d8dea8d29f538cc5e713c523cbf 100644
--- a/src/research/dataset/scripts/create_tensorflow_records.py
+++ b/src/research/dataset/scripts/create_tensorflow_records.py
@@ -3,17 +3,8 @@ from research.dataset.tensorflow_record import TensorflowRecordFactory
 
 if __name__ == "__main__":
     _factory = TensorflowRecordFactory()
-    # _factory.from_datasets(ROCODatasetsZoo.TWITCH_TRAIN_DATASETS, "Twitch2_Train_")
-    # _factory.from_datasets(ROCODatasetsZoo.TWITCH_VALIDATION_DATASETS, "Twitch2_Val_")
-    # _factory.from_datasets(ROCODatasetsZoo.TWITCH_TEST_DATASETS, "Twitch2_Test_")
-    _factory.from_datasets(ROCODatasetsZoo.TWITCH_DJI_TRAIN_DATASETS, "Twitch2_Dji_Train_")
-    # _factory.from_datasets(ROCODatasetsZoo.TWITCH_DJI_ZOOMED_TRAIN_DATASETS, "Twitch2_DjiZoomed2_Train_")
-
-    # TensorflowRecordFactory.from_datasets(ROCODatasetsZoo.DJI_TRAIN_DATASETS, "DJI_Train_")
-    # TensorflowRecordFactory.from_datasets(ROCODatasetsZoo.DJI_ZOOMED_TRAIN_DATASETS, "DJIZoomedV2_Train_")
-    # TensorflowRecordFactory.from_datasets(
-    #     ROCODatasetsZoo.TWITCH_TRAIN_DATASETS + ROCODatasetsZoo.DJI_TRAIN_DATASETS, "Twitch_DJI_Train_"
-    # )
-    # TensorflowRecordFactory.from_datasets(
-    #     ROCODatasetsZoo.TWITCH_TRAIN_DATASETS + ROCODatasetsZoo.DJI_ZOOMED_TRAIN_DATASETS, "Twitch_DJIZoomedV2_Train_"
-    # )
+    _factory.from_builders(ROCODatasetsZoo.TWITCH_TRAIN_DATASETS, "Twitch2_Train")
+    _factory.from_builders(ROCODatasetsZoo.TWITCH_VALIDATION_DATASETS, "Twitch2_Val")
+    _factory.from_builders(ROCODatasetsZoo.TWITCH_TEST_DATASETS, "Twitch2_Test")
+    _factory.from_builders(ROCODatasetsZoo.DJI, "Dji")
+    _factory.from_builders(ROCODatasetsZoo.DJI_ZOOMED, "DjiZoomed")
diff --git a/src/research/dataset/tensorflow_record.py b/src/research/dataset/tensorflow_record.py
index 5c2c80407f2e0f5638fb6b9695a4b33f40ceb1c3..f1948b8985f3667247ac4ae2ac2bef0a647ea5d8 100644
--- a/src/research/dataset/tensorflow_record.py
+++ b/src/research/dataset/tensorflow_record.py
@@ -1,7 +1,7 @@
 import hashlib
 from pathlib import Path
-from shutil import move
-from typing import List
+from threading import Thread
+from typing import Iterable, List
 
 from tensorflow.core.example.example_pb2 import Example
 from tensorflow.core.example.feature_pb2 import BytesList, Feature, Features, FloatList, Int64List
@@ -10,75 +10,93 @@ from tensorflow_core.python.lib.io.tf_record import TFRecordWriter
 from polystar.filters.pass_through_filter import PassThroughFilter
 from polystar.models.label_map import label_map
 from polystar.target_pipeline.objects_filters.objects_filter_abc import ObjectsFilterABC
-from polystar.utils.tqdm import smart_tqdm
+from polystar.utils.iterable_utils import chunk
+from polystar.utils.path import make_path
 from research.common.constants import TENSORFLOW_RECORDS_DIR
+from research.common.datasets.dataset import Dataset
 from research.common.datasets.roco.roco_annotation import ROCOAnnotation
 from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilder
+from research.common.datasets.shuffle_dataset import ShuffleDataset
+from research.common.datasets.union_dataset import UnionDataset
 
 
-class TensorflowRecordFactory:
-    def __init__(self, objects_filter: ObjectsFilterABC = PassThroughFilter()):
-        self.objects_filter = objects_filter
+class ROCOTensorflowRecordFactory:
+    RECORDS_DIR: Path = TENSORFLOW_RECORDS_DIR / "roco"
 
-    def from_dataset(self, dataset: ROCODatasetBuilder, prefix: str = ""):
-        self.from_datasets([dataset], prefix)
-
-    def from_datasets(self, datasets: List[ROCODatasetBuilder], prefix: str = ""):
-        record_name = prefix + "_".join(d.name for d in datasets)
-        writer = TFRecordWriter(str(TENSORFLOW_RECORDS_DIR / f"{record_name}.record"))
-        c = 0
-        for dataset in smart_tqdm(datasets, desc=record_name, unit="dataset"):
-            for image_path, annotation, _ in smart_tqdm(dataset, desc=dataset.name, unit="img", leave=False):
-                writer.write(self._example_from_image_annotation(image_path, annotation).SerializeToString())
-                c += 1
-        writer.close()
-        move(
-            str(TENSORFLOW_RECORDS_DIR / f"{record_name}.record"),
-            str(TENSORFLOW_RECORDS_DIR / f"{record_name}_{c}_imgs.record"),
-        )
+    def __init__(self, objects_filter: ObjectsFilterABC = PassThroughFilter(), n_images_per_file: int = 200):
+        self.n_images_per_file = n_images_per_file
+        self.objects_filter = objects_filter
 
-    def _example_from_image_annotation(self, image_path: Path, annotation: ROCOAnnotation) -> Example:
-        image_name = image_path.name
-        encoded_jpg = image_path.read_bytes()
-        key = hashlib.sha256(encoded_jpg).hexdigest()
-
-        width, height = annotation.w, annotation.h
-
-        xmin, ymin, xmax, ymax, classes, classes_text = [], [], [], [], [], []
-
-        for obj in self.objects_filter.filter(annotation.objects):
-            x1 = max(0.0, obj.box.x1 / width)
-            y1 = max(0.0, obj.box.y1 / height)
-            x2 = min(1.0, obj.box.x2 / width)
-            y2 = min(1.0, obj.box.y2 / height)
-            if x1 >= x2 or y1 >= y2:
-                continue
-            xmin.append(x1)
-            ymin.append(y1)
-            xmax.append(x2)
-            ymax.append(y2)
-            classes_text.append(obj.type.name.lower().encode("utf8"))
-            classes.append(label_map.id_of(obj.type.name.lower()))
-
-        return Example(
-            features=Features(
-                feature={
-                    "image/filename": bytes_feature(image_name.encode("utf8")),
-                    "image/source_id": bytes_feature(image_name.encode("utf8")),
-                    "image/height": int64_feature(height),
-                    "image/width": int64_feature(width),
-                    "image/key/sha256": bytes_feature(key.encode("utf8")),
-                    "image/encoded": bytes_feature(encoded_jpg),
-                    "image/format": bytes_feature("jpeg".encode("utf8")),
-                    "image/object/bbox/xmin": float_list_feature(xmin),
-                    "image/object/bbox/xmax": float_list_feature(xmax),
-                    "image/object/bbox/ymin": float_list_feature(ymin),
-                    "image/object/bbox/ymax": float_list_feature(ymax),
-                    "image/object/class/text": bytes_list_feature(classes_text),
-                    "image/object/class/label": int64_list_feature(classes),
-                }
-            )
+    def from_builders(self, builders: Iterable[ROCODatasetBuilder], prefix: str = ""):
+        dataset = UnionDataset(d.build() for d in builders)
+        dataset.name = f"{prefix}_{dataset.name}_{len(dataset)}_imgs"
+
+        self.from_dataset(dataset)
+
+    def from_dataset(self, dataset: Dataset[Path, ROCOAnnotation]):
+        records_dir = make_path(self.RECORDS_DIR / dataset.name)
+        chunks = list(chunk(ShuffleDataset(dataset), self.n_images_per_file))
+
+        for chunk_number, dataset_chunk in enumerate(chunks):
+            TFRecordFactoryThread(
+                records_dir / f"images_{chunk_number:05}_of_{len(chunks):05}.record", dataset_chunk
+            ).start()
+
+
+class TFRecordFactoryThread(Thread):
+    def __init__(self, record_file: Path, dataset_chunk: Iterable):
+        super().__init__()
+        self.dataset_chunk = dataset_chunk
+        self.record_file = record_file
+
+    def run(self) -> None:
+        with TFRecordWriter(str(self.record_file)) as writer:
+            for image_path, annotation, _ in self.dataset_chunk:
+                writer.write(_example_from_image_annotation(image_path, annotation).SerializeToString())
+
+
+def _example_from_image_annotation(image_path: Path, annotation: ROCOAnnotation) -> Example:
+    image_name = image_path.name
+    encoded_jpg = image_path.read_bytes()
+    key = hashlib.sha256(encoded_jpg).hexdigest()
+
+    width, height = annotation.w, annotation.h
+
+    xmin, ymin, xmax, ymax, classes, classes_text = [], [], [], [], [], []
+
+    for obj in annotation.objects:
+        x1 = max(0.0, obj.box.x1 / width)
+        y1 = max(0.0, obj.box.y1 / height)
+        x2 = min(1.0, obj.box.x2 / width)
+        y2 = min(1.0, obj.box.y2 / height)
+        if x1 >= x2 or y1 >= y2:
+            continue
+        xmin.append(x1)
+        ymin.append(y1)
+        xmax.append(x2)
+        ymax.append(y2)
+        classes_text.append(obj.type.name.lower().encode("utf8"))
+        classes.append(label_map.id_of(obj.type.name.lower()))
+
+    return Example(
+        features=Features(
+            feature={
+                "image/filename": bytes_feature(image_name.encode("utf8")),
+                "image/source_id": bytes_feature(image_name.encode("utf8")),
+                "image/height": int64_feature(height),
+                "image/width": int64_feature(width),
+                "image/key/sha256": bytes_feature(key.encode("utf8")),
+                "image/encoded": bytes_feature(encoded_jpg),
+                "image/format": bytes_feature("jpeg".encode("utf8")),
+                "image/object/bbox/xmin": float_list_feature(xmin),
+                "image/object/bbox/xmax": float_list_feature(xmax),
+                "image/object/bbox/ymin": float_list_feature(ymin),
+                "image/object/bbox/ymax": float_list_feature(ymax),
+                "image/object/class/text": bytes_list_feature(classes_text),
+                "image/object/class/label": int64_list_feature(classes),
+            }
         )
+    )
 
 
 # Functions inspired from
diff --git a/src/research/dataset/twitch/mask_detector.py b/src/research/dataset/twitch/mask_detector.py
index 4d63b526c8dda29406ba90f826450a5440f6b773..111aa609ea8af63bf403f5e456e9b4c9c19e6b49 100644
--- a/src/research/dataset/twitch/mask_detector.py
+++ b/src/research/dataset/twitch/mask_detector.py
@@ -4,7 +4,7 @@ import numpy as np
 
 from polystar.models.image import Image, load_image
 
-DIR_PATH = Path(__file__).parent
+MASKS_DIR = Path(__file__).parent / "masks"
 
 
 class Mask:
@@ -20,14 +20,14 @@ class Mask:
         return value <= self._threshold
 
 
-robot_view_mask_hd = Mask(DIR_PATH / "mask_robot_view_hd.jpg", 20)
-aerial_view_mask_red_hd = Mask(DIR_PATH / "mask_aerial_red_hd.jpg", 15)
-aerial_view_mask_red_2_hd = Mask(DIR_PATH / "mask_aerial_red_2_hd.jpg", 15)
-aerial_view_mask_blue_hd = Mask(DIR_PATH / "mask_aerial_blue_hd.jpg", 15)
-aerial_view_mask_blue_2_hd = Mask(DIR_PATH / "mask_aerial_blue_2_hd.jpg", 15)
-bonus_view_mask_hd = Mask(DIR_PATH / "mask_bonus.jpg", 20)
-bonus_2_view_mask_hd = Mask(DIR_PATH / "mask_bonus_2.jpg", 20)
-bonus_3_view_mask_hd = Mask(DIR_PATH / "mask_bonus_3.jpg", 20)
+robot_view_mask_hd = Mask(MASKS_DIR / "mask_robot_view_hd.jpg", 20)
+aerial_view_mask_red_hd = Mask(MASKS_DIR / "mask_aerial_red_hd.jpg", 15)
+aerial_view_mask_red_2_hd = Mask(MASKS_DIR / "mask_aerial_red_2_hd.jpg", 15)
+aerial_view_mask_blue_hd = Mask(MASKS_DIR / "mask_aerial_blue_hd.jpg", 15)
+aerial_view_mask_blue_2_hd = Mask(MASKS_DIR / "mask_aerial_blue_2_hd.jpg", 15)
+bonus_view_mask_hd = Mask(MASKS_DIR / "mask_bonus.jpg", 20)
+bonus_2_view_mask_hd = Mask(MASKS_DIR / "mask_bonus_2.jpg", 20)
+bonus_3_view_mask_hd = Mask(MASKS_DIR / "mask_bonus_3.jpg", 20)
 
 
 def is_aerial_view(image: Image) -> bool:
@@ -41,11 +41,3 @@ def is_aerial_view(image: Image) -> bool:
 
 def has_bonus_icon(image: Image) -> bool:
     return bonus_view_mask_hd.match(image) or bonus_2_view_mask_hd.match(image) or bonus_3_view_mask_hd.match(image)
-
-
-if __name__ == "__main__":
-    has_bonus_icon(
-        load_image(
-            Path("/Users/cytadel/polystar/cv-code/dataset/twitch/robots-views/470152932/470152932-frame-007460.jpg")
-        )
-    )
diff --git a/src/research/dataset/twitch/mask_aerial.jpg b/src/research/dataset/twitch/masks/mask_aerial.jpg
similarity index 100%
rename from src/research/dataset/twitch/mask_aerial.jpg
rename to src/research/dataset/twitch/masks/mask_aerial.jpg
diff --git a/src/research/dataset/twitch/mask_aerial_blue_2_hd.jpg b/src/research/dataset/twitch/masks/mask_aerial_blue_2_hd.jpg
similarity index 100%
rename from src/research/dataset/twitch/mask_aerial_blue_2_hd.jpg
rename to src/research/dataset/twitch/masks/mask_aerial_blue_2_hd.jpg
diff --git a/src/research/dataset/twitch/mask_aerial_blue_hd.jpg b/src/research/dataset/twitch/masks/mask_aerial_blue_hd.jpg
similarity index 100%
rename from src/research/dataset/twitch/mask_aerial_blue_hd.jpg
rename to src/research/dataset/twitch/masks/mask_aerial_blue_hd.jpg
diff --git a/src/research/dataset/twitch/mask_aerial_red_2_hd.jpg b/src/research/dataset/twitch/masks/mask_aerial_red_2_hd.jpg
similarity index 100%
rename from src/research/dataset/twitch/mask_aerial_red_2_hd.jpg
rename to src/research/dataset/twitch/masks/mask_aerial_red_2_hd.jpg
diff --git a/src/research/dataset/twitch/mask_aerial_red_hd.jpg b/src/research/dataset/twitch/masks/mask_aerial_red_hd.jpg
similarity index 100%
rename from src/research/dataset/twitch/mask_aerial_red_hd.jpg
rename to src/research/dataset/twitch/masks/mask_aerial_red_hd.jpg
diff --git a/src/research/dataset/twitch/mask_bonus.jpg b/src/research/dataset/twitch/masks/mask_bonus.jpg
similarity index 100%
rename from src/research/dataset/twitch/mask_bonus.jpg
rename to src/research/dataset/twitch/masks/mask_bonus.jpg
diff --git a/src/research/dataset/twitch/mask_bonus_2.jpg b/src/research/dataset/twitch/masks/mask_bonus_2.jpg
similarity index 100%
rename from src/research/dataset/twitch/mask_bonus_2.jpg
rename to src/research/dataset/twitch/masks/mask_bonus_2.jpg
diff --git a/src/research/dataset/twitch/mask_bonus_3.jpg b/src/research/dataset/twitch/masks/mask_bonus_3.jpg
similarity index 100%
rename from src/research/dataset/twitch/mask_bonus_3.jpg
rename to src/research/dataset/twitch/masks/mask_bonus_3.jpg
diff --git a/src/research/dataset/twitch/mask_robot_view.jpg b/src/research/dataset/twitch/masks/mask_robot_view.jpg
similarity index 100%
rename from src/research/dataset/twitch/mask_robot_view.jpg
rename to src/research/dataset/twitch/masks/mask_robot_view.jpg
diff --git a/src/research/dataset/twitch/mask_robot_view_hd.jpg b/src/research/dataset/twitch/masks/mask_robot_view_hd.jpg
similarity index 100%
rename from src/research/dataset/twitch/mask_robot_view_hd.jpg
rename to src/research/dataset/twitch/masks/mask_robot_view_hd.jpg
diff --git a/src/research/dataset/twitch/resize_mask.py b/src/research/dataset/twitch/resize_mask.py
deleted file mode 100644
index 238629956b3c4f23043b34287016a14f9b7c5c57..0000000000000000000000000000000000000000
--- a/src/research/dataset/twitch/resize_mask.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from pathlib import Path
-
-from imutils import resize
-
-from polystar.models.image import load_image, save_image
-
-save_image(resize(load_image(Path("mask_aerial.jpg")), 1920, 1080), Path("mask_aerial_red_hd.jpg"))
diff --git a/src/research/roco_detection/__init__.py b/src/research/roco_detection/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/research/roco_detection/robots_dataset.py b/src/research/roco_detection/robots_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..d80955cfa72c828375035201843d4897211f5b69
--- /dev/null
+++ b/src/research/roco_detection/robots_dataset.py
@@ -0,0 +1,37 @@
+from polystar.filters.filter_abc import FilterABC
+from polystar.target_pipeline.objects_filters.in_box_filter import InBoxObjectFilter
+from polystar.target_pipeline.objects_filters.type_object_filter import ARMORS_FILTER
+from polystar.view.plt_results_viewer import PltResultViewer
+from research.common.datasets.roco.roco_annotation import ROCOAnnotation
+from research.common.datasets.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo
+from research.roco_detection.small_base_filter import SMALL_BASE_FILTER
+
+
+def clear_small_bases(annotation: ROCOAnnotation):
+    small_bases, annotation.objects = SMALL_BASE_FILTER.split(annotation.objects)
+
+    if not small_bases:
+        return
+
+    armors, robots = ARMORS_FILTER.split(annotation.objects)
+    for base in small_bases:
+        armors = (-InBoxObjectFilter(base.box, 0.5)).filter(armors)
+    annotation.objects = robots + armors
+
+
+class AnnotationHasObjectsFilter(FilterABC[ROCOAnnotation]):
+    def validate_single(self, annotation: ROCOAnnotation) -> bool:
+        return bool(annotation.objects)
+
+
+if __name__ == "__main__":
+    for _img, _annotation, _name in (
+        (ROCODatasetsZoo.TWITCH.T470149066 | ROCODatasetsZoo.TWITCH.T470149568)
+        .shuffle()
+        .cap(10)
+        .to_air()
+        .filter_targets(AnnotationHasObjectsFilter())
+        .cap(30)
+    ):
+        with PltResultViewer(_name) as _viewer:
+            _viewer.display_image_with_objects(_img, _annotation.objects)
diff --git a/src/research/roco_detection/scripts/create_air_datasets.py b/src/research/roco_detection/scripts/create_air_datasets.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6a3d6ac0b5382fa6a36880f1a7afefa58b16830
--- /dev/null
+++ b/src/research/roco_detection/scripts/create_air_datasets.py
@@ -0,0 +1,7 @@
+from polystar.view.plt_results_viewer import PltResultViewer
+from research.common.datasets.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo
+
+if __name__ == "__main__":
+    for _img, _annotation, _name in ROCODatasetsZoo.TWITCH.T470149066.to_air().to_images().shuffle().cap(10):
+        with PltResultViewer(_name) as _v:
+            _v.display_image_with_objects(_img, _annotation.objects)
diff --git a/src/research/roco_detection/scripts/create_air_tensorflow_records.py b/src/research/roco_detection/scripts/create_air_tensorflow_records.py
new file mode 100644
index 0000000000000000000000000000000000000000..94ff1bc2ccac5e5bc8359cada450c8e209a2ed50
--- /dev/null
+++ b/src/research/roco_detection/scripts/create_air_tensorflow_records.py
@@ -0,0 +1,22 @@
+from typing import List
+
+from research.common.constants import TENSORFLOW_RECORDS_DIR
+from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilder
+from research.common.datasets.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo
+from research.dataset.tensorflow_record import ROCOTensorflowRecordFactory
+from research.roco_detection.robots_dataset import AnnotationHasObjectsFilter
+
+
+class AirTensorflowRecordFactory(ROCOTensorflowRecordFactory):
+    RECORDS_DIR = TENSORFLOW_RECORDS_DIR / "air"
+
+    def from_builders(self, builders: List[ROCODatasetBuilder], prefix: str = ""):
+        super().from_builders((b.to_air().filter_targets(AnnotationHasObjectsFilter()) for b in builders), prefix)
+
+
+if __name__ == "__main__":
+    _factory = AirTensorflowRecordFactory()
+    _factory.from_builders(ROCODatasetsZoo.TWITCH_TRAIN_DATASETS, "Twitch2_Train")
+    _factory.from_builders(ROCODatasetsZoo.TWITCH_VALIDATION_DATASETS, "Twitch2_Val")
+    _factory.from_builders(ROCODatasetsZoo.TWITCH_TEST_DATASETS, "Twitch2_Test")
+    _factory.from_builders(ROCODatasetsZoo.DJI, "Dji")
diff --git a/src/research/roco_detection/small_base_filter.py b/src/research/roco_detection/small_base_filter.py
new file mode 100644
index 0000000000000000000000000000000000000000..b170706884d5e6a8ffe68fb4c5fea50cf25cf7d4
--- /dev/null
+++ b/src/research/roco_detection/small_base_filter.py
@@ -0,0 +1,6 @@
+from polystar.models.roco_object import ObjectType
+from polystar.target_pipeline.objects_filters.objects_filter_abc import ObjectsFilterABC
+from polystar.target_pipeline.objects_filters.size_filter import SmallObjectFilter
+from polystar.target_pipeline.objects_filters.type_object_filter import TypeObjectsFilter
+
+SMALL_BASE_FILTER: ObjectsFilterABC = -TypeObjectsFilter({ObjectType.BASE}) | SmallObjectFilter(12_500)