diff --git a/common/polystar/common/filters/pass_through_filter.py b/common/polystar/common/filters/pass_through_filter.py new file mode 100644 index 0000000000000000000000000000000000000000..0096a3557e59dfebbe00c651380dc190f0046535 --- /dev/null +++ b/common/polystar/common/filters/pass_through_filter.py @@ -0,0 +1,8 @@ +from typing import Any + +from polystar.common.filters.filter_abc import FilterABC + + +class PassThroughFilter(FilterABC[Any]): + def validate_single(self, example: Any) -> bool: + return True diff --git a/common/research/common/dataset/improvement/zoom.py b/common/research/common/dataset/improvement/zoom.py index 9404b43d1e115abf71d1644ff189c33e6284ce1f..803a759a498bb0303be4540ab313570aa37511fb 100644 --- a/common/research/common/dataset/improvement/zoom.py +++ b/common/research/common/dataset/improvement/zoom.py @@ -1,13 +1,14 @@ from copy import copy from dataclasses import dataclass +from itertools import islice from typing import Iterable, List, Tuple from polystar.common.models.box import Box from polystar.common.models.image import Image from polystar.common.target_pipeline.objects_validators.in_box_validator import InBoxValidator from polystar.common.view.plt_results_viewer import PltResultViewer -from research.common.datasets.roco.roco_annotation import ROCOAnnotation -from research.common.datasets.roco.zoo.roco_datasets_zoo import ROCODatasetsZoo +from research.common.datasets_v3.roco.roco_annotation import ROCOAnnotation +from research.common.datasets_v3.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo def crop_image_annotation( @@ -119,13 +120,10 @@ class Zoomer: if __name__ == "__main__": - zoomer = Zoomer(854, 480, 0.15, 0.5) + _zoomer = Zoomer(854, 480, 0.15, 0.5) - for k, (img, annot) in enumerate(ROCODatasetsZoo.DJI.NorthChina): - viewer = PltResultViewer(f"img {i}") + for _img, _annot, _name in islice(ROCODatasetsZoo.DJI.NORTH_CHINA.lazy(), 0, 3): + _viewer = PltResultViewer(f"img {_name}") - for (cropped_image, cropped_annotation) in zoomer.zoom(img, annot): - viewer.display_image_with_objects(cropped_image, cropped_annotation.objects) - - if k == 2: - break + for (_cropped_image, _cropped_annotation, _cropped_name) in _zoomer.zoom(_img, _annot, _name): + _viewer.display_image_with_objects(_cropped_image, _cropped_annotation.objects) diff --git a/common/research/common/dataset/tensorflow_record.py b/common/research/common/dataset/tensorflow_record.py index 9fb7067ab579777118c3adfd7b06a427ef3a834b..e7ab09450dcde67e30d57ff68819517cbedfbff8 100644 --- a/common/research/common/dataset/tensorflow_record.py +++ b/common/research/common/dataset/tensorflow_record.py @@ -4,24 +4,24 @@ from shutil import move from typing import List import tensorflow as tf +from tensorflow_core.python.lib.io import python_io + from polystar.common.models.label_map import label_map +from polystar.common.utils.tqdm import smart_tqdm from research.common.constants import TENSORFLOW_RECORDS_DIR -from research.common.datasets.roco.directory_roco_dataset import \ - DirectoryROCODataset -from research.common.datasets.roco.roco_annotation import ROCOAnnotation -from tensorflow_core.python.lib.io import python_io -from tqdm import tqdm +from research.common.datasets_v3.roco.roco_annotation import ROCOAnnotation +from research.common.datasets_v3.roco.roco_datasets import ROCODatasets class TensorflowRecordFactory: @staticmethod - def from_datasets(datasets: List[DirectoryROCODataset], prefix: str = ""): + def from_datasets(datasets: List[ROCODatasets], prefix: str = ""): record_name = prefix + "_".join(d.name for d in datasets) writer = python_io.TFRecordWriter(str(TENSORFLOW_RECORDS_DIR / f"{record_name}.record")) c = 0 - for dataset in tqdm(datasets, desc=record_name, total=len(datasets), unit="dataset"): - for image_path, annotation, _ in tqdm( - dataset, desc=dataset.name, total=len(dataset), unit="img", leave=False + for dataset in smart_tqdm(datasets, desc=record_name, unit="dataset"): + for image_path, annotation, _ in smart_tqdm( + dataset.lazy_files(), desc=dataset.name, unit="img", leave=False ): writer.write(_example_from_image_annotation(image_path, annotation).SerializeToString()) c += 1 @@ -32,7 +32,7 @@ class TensorflowRecordFactory: ) @staticmethod - def from_dataset(dataset: DirectoryROCODataset, prefix: str = ""): + def from_dataset(dataset: ROCODatasets, prefix: str = ""): TensorflowRecordFactory.from_datasets([dataset], prefix) diff --git a/common/research/common/datasets/__init__.py b/common/research/common/datasets/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/common/research/common/datasets/dataset.py b/common/research/common/datasets/dataset.py deleted file mode 100644 index 74376878642bee2293baf12495e1e91c7e65875c..0000000000000000000000000000000000000000 --- a/common/research/common/datasets/dataset.py +++ /dev/null @@ -1,127 +0,0 @@ -from abc import ABC, abstractmethod -from collections import deque -from typing import Callable, Generic, Iterable, Iterator, Tuple, TypeVar - -from more_itertools import ilen -from polystar.common.utils.iterable_utils import smart_len -from polystar.common.utils.misc import identity - -ExampleT = TypeVar("ExampleT") -TargetT = TypeVar("TargetT") -ExampleU = TypeVar("ExampleU") -TargetU = TypeVar("TargetU") - - -class Dataset(Generic[ExampleT, TargetT], Iterable[Tuple[ExampleT, TargetT, str]], ABC): - def __init__(self, name: str): - self.name = name - - @property - @abstractmethod - def examples(self) -> Iterable[ExampleT]: - pass - - @property - @abstractmethod - def targets(self) -> Iterable[TargetT]: - pass - - @property - @abstractmethod - def names(self) -> Iterable[TargetT]: - pass - - @abstractmethod - def __iter__(self) -> Iterator[Tuple[ExampleT, TargetT, str]]: - pass - - @abstractmethod - def __len__(self): - pass - - def transform_examples(self, example_transformer: Callable[[ExampleT], ExampleU]) -> "Dataset[ExampleU, TargetT]": - return self.transform(example_transformer, identity) - - def transform_targets( - self, target_transformer: Callable[[TargetT], TargetU] = identity - ) -> "Dataset[ExampleT, TargetU]": - return self.transform(identity, target_transformer) - - def transform( - self, example_transformer: Callable[[ExampleT], ExampleU], target_transformer: Callable[[TargetT], TargetU] - ) -> "Dataset[ExampleU, TargetU]": - return GeneratorDataset( - self.name, - lambda: ( - (example_transformer(example), target_transformer(target), name) for example, target, name in self - ), - ) - - def __str__(self): - return f"<{self.__class__.__name__} {self.name}>" - - __repr__ = __str__ - - def check_consistency(self): - assert smart_len(self.targets) == smart_len(self.examples) == smart_len(self.names) - - -class LazyUnzipper: - def __init__(self, iterator: Iterator[Tuple], n: int): - self._iterator = iterator - self._memory = [deque() for _ in range(n)] - - def empty(self, i: int): - return self._iterator is None and not self._memory[i] - - def elements(self, i: int): - while True: - if self._memory[i]: - yield self._memory[i].popleft() - elif self._iterator is None: - return - else: - try: - elements = next(self._iterator) - for k in range(len(elements)): - if k != i: - self._memory[k].append(elements[k]) - yield elements[i] - except StopIteration: - self._iterator = None - return - - -class LazyDataset(Dataset[ExampleT, TargetT], ABC): - def __init__(self, name: str): - super().__init__(name) - self._unzipper = None - - @property - def examples(self) -> Iterable[ExampleT]: - return self._elements(0) - - @property - def targets(self) -> Iterable[TargetT]: - return self._elements(1) - - @property - def names(self) -> Iterable[str]: - return self._elements(2) - - def __len__(self): - return ilen(self) - - def _elements(self, i: int) -> Iterable: - if self._unzipper is None or self._unzipper.empty(i): - self._unzipper = LazyUnzipper(iter(self), 3) - return self._unzipper.elements(i) - - -class GeneratorDataset(LazyDataset[ExampleT, TargetT]): - def __init__(self, name: str, generator: Callable[[], Iterator[Tuple[ExampleT, TargetT, str]]]): - self.generator = generator - super().__init__(name) - - def __iter__(self) -> Iterator[Tuple[ExampleT, TargetT, str]]: - return self.generator() diff --git a/common/research/common/datasets/filtered_dataset.py b/common/research/common/datasets/filtered_dataset.py deleted file mode 100644 index cd34af143f685f5700a6188e83452c2de93e1976..0000000000000000000000000000000000000000 --- a/common/research/common/datasets/filtered_dataset.py +++ /dev/null @@ -1,19 +0,0 @@ -from polystar.common.filters.filter_abc import FilterABC -from research.common.datasets.dataset import Dataset, ExampleT, TargetT -from research.common.datasets.simple_dataset import SimpleDataset - - -class FilteredTargetsDataset(SimpleDataset[ExampleT, TargetT]): - def __init__(self, dataset: Dataset[ExampleT, TargetT], targets_filter: FilterABC[TargetT]): - targets, examples, names = targets_filter.filter_with_siblings( - list(dataset.targets), list(dataset.examples), list(dataset.names) - ) - super().__init__(examples, targets, names, dataset.name) - - -class FilteredExamplesDataset(SimpleDataset[ExampleT, TargetT]): - def __init__(self, dataset: Dataset[ExampleT, TargetT], examples_filter: FilterABC[ExampleT]): - super().__init__( - *examples_filter.filter_with_siblings(list(dataset.examples), list(dataset.targets), list(dataset.names)), - dataset.name, - ) diff --git a/common/research/common/datasets/image_dataset.py b/common/research/common/datasets/image_dataset.py deleted file mode 100644 index ad236b92e10f3f31119eb26e702032682f1f27fb..0000000000000000000000000000000000000000 --- a/common/research/common/datasets/image_dataset.py +++ /dev/null @@ -1,49 +0,0 @@ -from abc import ABC, abstractmethod -from pathlib import Path -from typing import Iterator, List, Tuple - -from memoized_property import memoized_property -from more_itertools import ilen -from polystar.common.models.image import Image, load_image -from research.common.datasets.dataset import Dataset, LazyDataset, TargetT - -ImageDataset = Dataset[Image, TargetT] - - -class ImageFileDataset(LazyDataset[Path, TargetT], ABC): - def __iter__(self) -> Iterator[Tuple[Path, TargetT, str]]: - for image_file in self.image_files: - yield image_file, self.target_from_image_file(image_file), image_file.stem - - @abstractmethod - def target_from_image_file(self, image_file: Path) -> TargetT: - pass - - @property - @abstractmethod - def image_files(self) -> Iterator[Path]: - pass - - def open(self) -> ImageDataset: - return open_file_dataset(self) - - def __len__(self): - return ilen(self.image_files) - - -def open_file_dataset(dataset: Dataset[Path, TargetT]) -> ImageDataset: - return dataset.transform_examples(load_image) - - -class ImageDirectoryDataset(ImageFileDataset[TargetT], ABC): - def __init__(self, images_dir: Path, name: str, extension: str = "jpg"): - super().__init__(name) - self.extension = extension - self.images_dir = images_dir - - @memoized_property - def image_files(self) -> List[Path]: - return list(sorted(self.images_dir.glob(f"*.{self.extension}"))) - - def __len__(self): - return len(self.image_files) diff --git a/common/research/common/datasets/roco/__init__.py b/common/research/common/datasets/roco/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/common/research/common/datasets/roco/directory_roco_dataset.py b/common/research/common/datasets/roco/directory_roco_dataset.py deleted file mode 100644 index 1b84b8f9b26f6f1860b5fbc32be11f3f1324b204..0000000000000000000000000000000000000000 --- a/common/research/common/datasets/roco/directory_roco_dataset.py +++ /dev/null @@ -1,24 +0,0 @@ -from pathlib import Path - -from polystar.common.models.image import Image, save_image -from research.common.datasets.image_dataset import ImageDirectoryDataset -from research.common.datasets.roco.roco_annotation import ROCOAnnotation - - -class DirectoryROCODataset(ImageDirectoryDataset[ROCOAnnotation]): - def __init__(self, dataset_path: Path, name: str): - super().__init__(dataset_path / "image", name) - self.main_dir = dataset_path - self.annotations_dir: Path = self.main_dir / "image_annotation" - - def target_from_image_file(self, image_file: Path) -> ROCOAnnotation: - return ROCOAnnotation.from_xml_file(self.annotations_dir / f"{image_file.stem}.xml") - - def create(self): - self.main_dir.mkdir(parents=True) - self.images_dir.mkdir() - self.annotations_dir.mkdir() - - def add(self, image: Image, annotation: ROCOAnnotation, name: str): - save_image(image, self.images_dir / f"{name}.jpg") - (self.annotations_dir / f"{name}.xml").write_text(annotation.to_xml()) diff --git a/common/research/common/datasets/roco/roco_dataset.py b/common/research/common/datasets/roco/roco_dataset.py deleted file mode 100644 index 0d58f5d9e232790e4e74aabe49899d0154418042..0000000000000000000000000000000000000000 --- a/common/research/common/datasets/roco/roco_dataset.py +++ /dev/null @@ -1,7 +0,0 @@ -from polystar.common.models.image import Image -from research.common.datasets.dataset import Dataset -from research.common.datasets.image_dataset import ImageFileDataset -from research.common.datasets.roco.roco_annotation import ROCOAnnotation - -ROCODataset = Dataset[Image, ROCOAnnotation] -ROCOFileDataset = ImageFileDataset[ROCOAnnotation] diff --git a/common/research/common/datasets/roco/roco_datasets.py b/common/research/common/datasets/roco/roco_datasets.py deleted file mode 100644 index 55056b5b916d2bdf4317788997d21144c3e37368..0000000000000000000000000000000000000000 --- a/common/research/common/datasets/roco/roco_datasets.py +++ /dev/null @@ -1,32 +0,0 @@ -from abc import abstractmethod -from pathlib import Path -from typing import Any, ClassVar, Iterable, Iterator, List, Tuple - -from research.common.datasets.roco.directory_roco_dataset import \ - DirectoryROCODataset - - -class ROCODatasets(Iterable[DirectoryROCODataset]): - name: ClassVar[str] - datasets: ClassVar[List[DirectoryROCODataset]] - directory: ClassVar[Path] - - @classmethod - @abstractmethod - def make_dataset(cls, dataset_name: str, *args: Any) -> DirectoryROCODataset: - pass - - def __init_subclass__(cls, **kwargs): - cls.datasets: List[DirectoryROCODataset] = [] - for dataset_name, args in cls.__dict__.items(): - if not dataset_name.islower(): - if not isinstance(args, Tuple): - args = (args,) - dataset = cls.make_dataset(dataset_name, *args) - setattr(cls, dataset_name, dataset) - cls.datasets.append(dataset) - - cls.name = cls.__name__[: -len("ROCODatasets")] - - def __iter__(self) -> Iterator[DirectoryROCODataset]: - return self.datasets.__iter__() diff --git a/common/research/common/datasets/roco/zoo/__init__.py b/common/research/common/datasets/roco/zoo/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/common/research/common/datasets/roco/zoo/dji.py b/common/research/common/datasets/roco/zoo/dji.py deleted file mode 100644 index 221430a7511d1583e907ea92c23f3197e5efeb80..0000000000000000000000000000000000000000 --- a/common/research/common/datasets/roco/zoo/dji.py +++ /dev/null @@ -1,17 +0,0 @@ -from research.common.constants import DJI_ROCO_DSET_DIR -from research.common.datasets.roco.directory_roco_dataset import \ - DirectoryROCODataset -from research.common.datasets.roco.roco_datasets import ROCODatasets - - -class DJIROCODatasets(ROCODatasets): - directory = DJI_ROCO_DSET_DIR - - CentralChina = "robomaster_Central China Regional Competition" - NorthChina = "robomaster_North China Regional Competition" - SouthChina = "robomaster_South China Regional Competition" - Final = "robomaster_Final Tournament" - - @classmethod - def make_dataset(cls, dataset_name: str, competition_name: str) -> DirectoryROCODataset: - return DirectoryROCODataset(cls.directory / competition_name, dataset_name) diff --git a/common/research/common/datasets/roco/zoo/dji_zoomed.py b/common/research/common/datasets/roco/zoo/dji_zoomed.py deleted file mode 100644 index 5d91133216b51424e07a9ebfa28452932ed5cc61..0000000000000000000000000000000000000000 --- a/common/research/common/datasets/roco/zoo/dji_zoomed.py +++ /dev/null @@ -1,20 +0,0 @@ -from typing import Any - -from polystar.common.utils.str_utils import camel2snake -from research.common.constants import DJI_ROCO_ZOOMED_DSET_DIR -from research.common.datasets.roco.directory_roco_dataset import \ - DirectoryROCODataset -from research.common.datasets.roco.roco_datasets import ROCODatasets - - -class DJIROCOZoomedDatasets(ROCODatasets): - directory = DJI_ROCO_ZOOMED_DSET_DIR - - CentralChina = () - NorthChina = () - SouthChina = () - Final = () - - @classmethod - def make_dataset(cls, dataset_name: str, *args: Any) -> DirectoryROCODataset: - return DirectoryROCODataset(cls.directory / camel2snake(dataset_name), f"{dataset_name}ZoomedV2") diff --git a/common/research/common/datasets/roco/zoo/roco_datasets_zoo.py b/common/research/common/datasets/roco/zoo/roco_datasets_zoo.py deleted file mode 100644 index 3de4a2822130553875c4c5e97b4c471aaebe177c..0000000000000000000000000000000000000000 --- a/common/research/common/datasets/roco/zoo/roco_datasets_zoo.py +++ /dev/null @@ -1,15 +0,0 @@ -from typing import Iterable - -from research.common.datasets.roco.roco_datasets import ROCODatasets -from research.common.datasets.roco.zoo.dji import DJIROCODatasets -from research.common.datasets.roco.zoo.dji_zoomed import DJIROCOZoomedDatasets -from research.common.datasets.roco.zoo.twitch import TwitchROCODatasets - - -class ROCODatasetsZoo(Iterable[ROCODatasets]): - DJI_ZOOMED = DJIROCOZoomedDatasets() - DJI = DJIROCODatasets() - TWITCH = TwitchROCODatasets() - - def __iter__(self): - return (self.DJI, self.DJI_ZOOMED, self.TWITCH).__iter__() diff --git a/common/research/common/datasets/roco/zoo/twitch.py b/common/research/common/datasets/roco/zoo/twitch.py deleted file mode 100644 index 12eac545f80b9a82b596c73ccac2a172e1b810a3..0000000000000000000000000000000000000000 --- a/common/research/common/datasets/roco/zoo/twitch.py +++ /dev/null @@ -1,24 +0,0 @@ -from typing import Any - -from research.common.constants import TWITCH_DSET_DIR -from research.common.datasets.roco.directory_roco_dataset import \ - DirectoryROCODataset -from research.common.datasets.roco.roco_datasets import ROCODatasets - - -class TwitchROCODatasets(ROCODatasets): - directory = TWITCH_DSET_DIR / "v1" - - T470149568 = () - T470150052 = () - T470151286 = () - T470152289 = () - T470152730 = () - T470152838 = () - T470153081 = () - T470158483 = () - - @classmethod - def make_dataset(cls, dataset_name: str, *args: Any) -> DirectoryROCODataset: - twitch_id = dataset_name[len("T") :] - return DirectoryROCODataset(cls.directory / twitch_id, dataset_name) diff --git a/common/research/common/datasets/simple_dataset.py b/common/research/common/datasets/simple_dataset.py deleted file mode 100644 index 8e6ee1eec2358dfa3e75f2407cb8c2b0b7afbda3..0000000000000000000000000000000000000000 --- a/common/research/common/datasets/simple_dataset.py +++ /dev/null @@ -1,30 +0,0 @@ -from typing import Iterable, Iterator, List, Tuple - -from research.common.datasets.dataset import Dataset, ExampleT, TargetT - - -class SimpleDataset(Dataset[ExampleT, TargetT]): - def __init__(self, examples: Iterable[ExampleT], targets: Iterable[TargetT], names: Iterable[str], name: str): - super().__init__(name) - self._examples = list(examples) - self._targets = list(targets) - self._names = list(names) - self.check_consistency() - - @property - def examples(self) -> List[ExampleT]: - return self._examples - - @property - def targets(self) -> List[TargetT]: - return self._targets - - @property - def names(self) -> List[TargetT]: - return self._names - - def __iter__(self) -> Iterator[Tuple[ExampleT, TargetT, str]]: - return zip(self.examples, self.targets, self.names) - - def __len__(self): - return len(self.examples) diff --git a/common/research/common/datasets/union_dataset.py b/common/research/common/datasets/union_dataset.py deleted file mode 100644 index 4967631ee60b3b173c41741ebef188c13b744f06..0000000000000000000000000000000000000000 --- a/common/research/common/datasets/union_dataset.py +++ /dev/null @@ -1,17 +0,0 @@ -from typing import Iterable, Iterator, Tuple - -from research.common.datasets.dataset import (Dataset, ExampleT, LazyDataset, - TargetT) - - -class UnionDataset(LazyDataset[ExampleT, TargetT]): - def __init__(self, datasets: Iterable[Dataset[ExampleT, TargetT]], name: str = None): - self.datasets = list(datasets) - super().__init__(name or "_".join(d.name for d in self.datasets)) - - def __iter__(self) -> Iterator[Tuple[ExampleT, TargetT, str]]: - for dataset in self.datasets: - yield from dataset - - def __len__(self): - return sum(map(len, self.datasets)) diff --git a/common/research/common/datasets_v3/capped_dataset.py b/common/research/common/datasets_v3/capped_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..9cbbef54605d4a039cb6254e151964d8881a2fe7 --- /dev/null +++ b/common/research/common/datasets_v3/capped_dataset.py @@ -0,0 +1,14 @@ +from itertools import islice +from typing import Iterator, Tuple + +from research.common.datasets_v3.lazy_dataset import ExampleT, LazyDataset, TargetT + + +class CappedDataset(LazyDataset): + def __init__(self, source: LazyDataset[ExampleT, TargetT], n: int): + super().__init__(source.name) + self.n = n + self.source = source + + def __iter__(self) -> Iterator[Tuple[ExampleT, TargetT]]: + return islice(self.source, self.n) diff --git a/common/research/common/datasets_v3/dataset_builder.py b/common/research/common/datasets_v3/dataset_builder.py index 2bb166597f87d99fba8d1219dbb63fccfe7d15e5..bae20b93516cec440b401c310cb7184232777369 100644 --- a/common/research/common/datasets_v3/dataset_builder.py +++ b/common/research/common/datasets_v3/dataset_builder.py @@ -3,6 +3,7 @@ from typing import Callable, Generic, Iterable, Iterator, Tuple from polystar.common.filters.filter_abc import FilterABC from polystar.common.filters.pass_through_filter import PassThroughFilter from polystar.common.utils.misc import identity +from research.common.datasets_v3.capped_dataset import CappedDataset from research.common.datasets_v3.dataset import Dataset from research.common.datasets_v3.filter_dataset import ExampleU, FilterDataset, TargetU from research.common.datasets_v3.lazy_dataset import ExampleT, LazyDataset, TargetT @@ -54,6 +55,10 @@ class DatasetBuilder(Generic[ExampleT, TargetT], Iterable[Tuple[ExampleT, Target self.dataset = TransformDataset(self.dataset, identity, target_transformer) return self + def cap(self, n: int) -> "DatasetBuilder[ExampleT, TargetT]": + self.dataset = CappedDataset(self.dataset, n) + return self + @property def name(self) -> str: return self.dataset.name diff --git a/common/research/common/datasets_v3/image_file_dataset_builder.py b/common/research/common/datasets_v3/image_file_dataset_builder.py index fbf86dfcfc3c9313cd10bc9fecda4b46eb5fd3e0..dbd5b18f44fab2f0d5c16869c8593549fe7e65bd 100644 --- a/common/research/common/datasets_v3/image_file_dataset_builder.py +++ b/common/research/common/datasets_v3/image_file_dataset_builder.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Callable, Iterable, Iterator, Tuple +from typing import Callable, Generic, Iterable, Iterator, Tuple from polystar.common.models.image import Image, load_image from research.common.datasets_v3.dataset_builder import DatasetBuilder @@ -20,7 +20,7 @@ class LazyFileDataset(LazyDataset[Path, TargetT]): return len(self.files) -class FileDatasetBuilder(DatasetBuilder[Path, TargetT]): +class FileDatasetBuilder(Generic[TargetT], DatasetBuilder[Path, TargetT]): def __init__(self, dataset: LazyFileDataset): super().__init__(dataset) @@ -28,7 +28,7 @@ class FileDatasetBuilder(DatasetBuilder[Path, TargetT]): return self.transform_examples(load_image) -class DirectoryDatasetBuilder(FileDatasetBuilder): +class DirectoryDatasetBuilder(FileDatasetBuilder[TargetT]): def __init__(self, directory: Path, target_from_file: Callable[[Path], TargetT], name: str, extension: str = "jpg"): super().__init__(LazyFileDataset(directory.glob(f"*.{extension}"), target_from_file, name)) self.images_dir = directory diff --git a/common/research/common/datasets_v3/iterator_dataset.py b/common/research/common/datasets_v3/iterator_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..7b583deffa6dd1fb5ac8cb6934b0cd017e0ac0e9 --- /dev/null +++ b/common/research/common/datasets_v3/iterator_dataset.py @@ -0,0 +1,12 @@ +from typing import Iterator, Tuple + +from research.common.datasets_v3.lazy_dataset import ExampleT, LazyDataset, TargetT + + +class IteratorDataset(LazyDataset[ExampleT, TargetT]): + def __init__(self, iterator: Iterator[Tuple[ExampleT, TargetT, str]], name: str): + super().__init__(name) + self.iterator = iterator + + def __iter__(self) -> Iterator[Tuple[ExampleT, TargetT, str]]: + return self.iterator diff --git a/common/research/common/datasets_v3/roco/roco_dataset_builder.py b/common/research/common/datasets_v3/roco/roco_dataset_builder.py index 73ebc12aa0d4d85502b291531ce3e8401e10f85f..4bd420764c60c6188d198f850c3745901b4f68a6 100644 --- a/common/research/common/datasets_v3/roco/roco_dataset_builder.py +++ b/common/research/common/datasets_v3/roco/roco_dataset_builder.py @@ -4,7 +4,7 @@ from research.common.datasets_v3.image_file_dataset_builder import DirectoryData from research.common.datasets_v3.roco.roco_annotation import ROCOAnnotation -class ROCODatasetBuilder(DirectoryDatasetBuilder): +class ROCODatasetBuilder(DirectoryDatasetBuilder[ROCOAnnotation]): def __init__(self, directory: Path, name: str, extension: str = "jpg"): super().__init__(directory / "image", self.roco_annotation_from_image_file, name, extension) self.annotations_dir = directory / "image_annotation" diff --git a/common/research/common/datasets_v3/roco/roco_datasets.py b/common/research/common/datasets_v3/roco/roco_datasets.py index d64b454f22d23fedcabc67cd5125a54ef47b8b86..ff5fa10debfc908cd5739e87be5ceea8ac3815b7 100644 --- a/common/research/common/datasets_v3/roco/roco_datasets.py +++ b/common/research/common/datasets_v3/roco/roco_datasets.py @@ -21,23 +21,23 @@ class ROCODatasets(Enum): self._dataset_dir_name = dataset_dir_name def lazy(self) -> LazyROCODataset: - return self._dataset_builder.to_images().build_lazy() + return self.builder.to_images().build_lazy() def lazy_files(self) -> LazyROCOFileDataset: - return self._dataset_builder.build_lazy() + return self.builder.build_lazy() def dataset(self) -> ROCODataset: - return self._dataset_builder.to_images().build() + return self.builder.to_images().build() def files_dataset(self) -> ROCOFileDataset: - return self._dataset_builder.build() + return self.builder.build() @property def main_dir(self): return self.datasets_dir() / self._dataset_dir_name @property - def _dataset_builder(self) -> ROCODatasetBuilder: + def builder(self) -> ROCODatasetBuilder: return ROCODatasetBuilder(self.main_dir, self.dataset_name) @classmethod diff --git a/common/tests/common/unittests/datasets/roco/test_directory_dataset_zoo.py b/common/tests/common/unittests/datasets/roco/test_directory_dataset_zoo.py index fbb37073dd6f19c8b17ecacd2e0ac7360ccf03c4..b82b017778a5573507c9f8b4dab9e6b0a0a69da9 100644 --- a/common/tests/common/unittests/datasets/roco/test_directory_dataset_zoo.py +++ b/common/tests/common/unittests/datasets/roco/test_directory_dataset_zoo.py @@ -4,10 +4,10 @@ from unittest import TestCase from numpy import asarray, float32 from numpy.testing import assert_array_almost_equal + from polystar.common.models.image import save_image -from research.common.datasets.roco.directory_roco_dataset import \ - DirectoryROCODataset -from research.common.datasets.roco.roco_annotation import ROCOAnnotation +from research.common.datasets.roco.directory_roco_dataset import DirectoryROCODataset +from research.common.datasets_v3.roco.roco_annotation import ROCOAnnotation class TestDirectoryROCODataset(TestCase): diff --git a/common/tests/common/unittests/datasets_v3/roco/test_directory_dataset_zoo.py b/common/tests/common/unittests/datasets_v3/roco/test_directory_dataset_zoo.py new file mode 100644 index 0000000000000000000000000000000000000000..90dabb91cee25899702a8210b67b43be5af56237 --- /dev/null +++ b/common/tests/common/unittests/datasets_v3/roco/test_directory_dataset_zoo.py @@ -0,0 +1,54 @@ +from pathlib import Path +from tempfile import TemporaryDirectory +from typing import Tuple +from unittest import TestCase + +from numpy import asarray, float32 +from numpy.testing import assert_array_almost_equal + +from polystar.common.models.image import save_image +from research.common.datasets_v3.roco.roco_annotation import ROCOAnnotation +from research.common.datasets_v3.roco.roco_dataset_builder import ROCODatasetBuilder + + +class TestDirectoryROCODataset(TestCase): + def test_file(self): + with TemporaryDirectory() as dataset_dir: + dataset_dir = Path(dataset_dir) + + annotation = ROCOAnnotation(objects=[], has_rune=False, w=160, h=90) + + images_dir, annotations_dir = self._setup_dir(dataset_dir) + + (annotations_dir / "frame_1.xml").write_text(annotation.to_xml()) + (images_dir / "frame_1.jpg").write_text("") + + dataset = ROCODatasetBuilder(dataset_dir, "fake").build_lazy() + self.assertEqual([(images_dir / "frame_1.jpg", annotation, "frame_1")], list(dataset)) + + def test_image(self): + with TemporaryDirectory() as dataset_dir: + dataset_dir = Path(dataset_dir) + + annotation = ROCOAnnotation(objects=[], has_rune=False, w=160, h=90) + image = asarray([[[250, 0, 0], [250, 0, 0]], [[250, 0, 0], [250, 0, 0]]]).astype(float32) + + images_dir, annotations_dir = self._setup_dir(dataset_dir) + + (annotations_dir / "frame_1.xml").write_text(annotation.to_xml()) + save_image(image, images_dir / "frame_1.jpg") + + dataset = ROCODatasetBuilder(dataset_dir, "fake").to_images().build() + self.assertEqual([annotation], list(dataset.targets)) + images = list(dataset.examples) + self.assertEqual(1, len(images)) + assert_array_almost_equal(image / 256, images[0] / 256, decimal=2) # jpeg precision + + def _setup_dir(self, dataset_dir: Path) -> Tuple[Path, Path]: + images_dir = dataset_dir / "image" + annotations_dir = dataset_dir / "image_annotation" + + annotations_dir.mkdir() + images_dir.mkdir() + + return images_dir, annotations_dir diff --git a/common/tests/common/unittests/datasets_v3/test_dataset.py b/common/tests/common/unittests/datasets_v3/test_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..cf34bbaa6e5d5e8a238f787c30be254edca4b316 --- /dev/null +++ b/common/tests/common/unittests/datasets_v3/test_dataset.py @@ -0,0 +1,101 @@ +from unittest import TestCase +from unittest.mock import MagicMock + +from research.common.datasets.dataset import Dataset, LazyDataset +from research.common.datasets.simple_dataset import SimpleDataset + + +class TestDataset(TestCase): + def test_transform(self): + dataset = _make_fake_dataset() + + str_str_dataset: Dataset[str, str] = dataset.transform(str, str) + + self.assertEqual( + [("0", "8", "data_1"), ("1", "9", "data_2"), ("2", "10", "data_3"), ("3", "11", "data_4")], + list(str_str_dataset), + ) + + def test_transform_examples(self): + dataset = _make_fake_dataset() + + str_int_dataset: Dataset[str, int] = dataset.transform_examples(str) + + self.assertEqual( + [("0", 8, "data_1"), ("1", 9, "data_2"), ("2", 10, "data_3"), ("3", 11, "data_4")], list(str_int_dataset) + ) + + def test_transform_not_exhaustible(self): + dataset = _make_fake_dataset() + + str_int_dataset: Dataset[str, float] = dataset.transform_examples(str) + + items = [("0", 8, "data_1"), ("1", 9, "data_2"), ("2", 10, "data_3"), ("3", 11, "data_4")] + + self.assertEqual(items, list(str_int_dataset)) + self.assertEqual(items, list(str_int_dataset)) + self.assertEqual(items, list(str_int_dataset)) + + +class TestSimpleDataset(TestCase): + def test_properties(self): + dataset = _make_fake_dataset() + + self.assertEqual([0, 1, 2, 3], dataset.examples) + self.assertEqual([8, 9, 10, 11], dataset.targets) + self.assertEqual(["data_1", "data_2", "data_3", "data_4"], dataset.names) + + def test_iter(self): + dataset = _make_fake_dataset() + + self.assertEqual([(0, 8, "data_1"), (1, 9, "data_2"), (2, 10, "data_3"), (3, 11, "data_4")], list(dataset)) + + def test_len(self): + dataset = _make_fake_dataset() + + self.assertEqual(4, len(dataset)) + + def test_consistency(self): + with self.assertRaises(AssertionError): + SimpleDataset([0, 1], [8, 9, 10, 11], ["a", "b"], "fake") + + +class FakeLazyDataset(LazyDataset): + def __init__(self): + super().__init__("fake") + + __iter__ = MagicMock(side_effect=lambda *args: iter([(1, 1, "data_1"), (2, 4, "data_2"), (3, 9, "data_3")])) + + +class TestLazyDataset(TestCase): + def test_properties(self): + dataset = FakeLazyDataset() + + self.assertEqual([1, 2, 3], list(dataset.examples)) + self.assertEqual([1, 4, 9], list(dataset.targets)) + self.assertEqual( + [(1, 1, "data_1"), (2, 4, "data_2"), (3, 9, "data_3")], + list(zip(dataset.examples, dataset.targets, dataset.names)), + ) + + def test_properties_laziness(self): + FakeLazyDataset.__iter__.reset_mock() + dataset = FakeLazyDataset() + + list(dataset.examples) + list(dataset.targets) + FakeLazyDataset.__iter__.assert_called_once() + + FakeLazyDataset.__iter__.reset_mock() + + list(zip(dataset.examples, dataset.targets)) + FakeLazyDataset.__iter__.assert_called_once() + + FakeLazyDataset.__iter__.reset_mock() + + list(dataset.names) + FakeLazyDataset.__iter__.assert_not_called() + + +def _make_fake_dataset() -> Dataset[int, int]: + return SimpleDataset([0, 1, 2, 3], [8, 9, 10, 11], [f"data_{i}" for i in range(1, 5)], "fake") diff --git a/robots-at-robots/research/robots_at_robots/armor_color/armor_color_dataset.py b/robots-at-robots/research/robots_at_robots/armor_color/armor_color_dataset.py index c69b65d4293fbfbe0b058b79d16a049fe7da28a9..052232db6b758ed7b9b03ebdb2b01021e35a2f96 100644 --- a/robots-at-robots/research/robots_at_robots/armor_color/armor_color_dataset.py +++ b/robots-at-robots/research/robots_at_robots/armor_color/armor_color_dataset.py @@ -1,37 +1,26 @@ from itertools import islice -from pathlib import Path -import matplotlib.pyplot as plt from polystar.common.models.object import Armor -from research.common.datasets.dataset import Dataset -from research.common.datasets.image_dataset import open_file_dataset -from research.common.datasets.roco.zoo.roco_datasets_zoo import ROCODatasetsZoo -from research.robots_at_robots.dataset.armor_value_dataset import ( - ArmorValueDatasetCache, ArmorValueDirectoryDataset) +from research.common.datasets_v3.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo +from research.robots_at_robots.dataset.armor_value_dataset_generator import ArmorValueDatasetGenerator +from research.robots_at_robots.dataset.armor_value_target_factory import ArmorValueTargetFactory -class ArmorColorDirectoryDataset(ArmorValueDirectoryDataset[str]): - @staticmethod - def _value_from_str(label: str) -> str: +class ArmorColorTargetFactory(ArmorValueTargetFactory[str]): + def from_str(self, label: str) -> str: return label - -class ArmorColorDatasetCache(ArmorValueDatasetCache[str]): - def __init__(self): - super().__init__("colors") - - def _value_from_armor(self, armor: Armor) -> str: + def from_armor(self, armor: Armor) -> str: return armor.color.name.lower() - def from_directory_and_name(self, directory: Path, name: str) -> Dataset[Path, str]: - return ArmorColorDirectoryDataset(directory, name) + +def make_armor_color_dataset_generator() -> ArmorValueDatasetGenerator[str]: + return ArmorValueDatasetGenerator("colors", ArmorColorTargetFactory()) if __name__ == "__main__": - _dataset = open_file_dataset(ArmorColorDatasetCache().from_roco_dataset(ROCODatasetsZoo.TWITCH.T470150052)) + _roco_dataset_builder = ROCODatasetsZoo.DJI.CENTRAL_CHINA.builder + _armor_color_dataset = make_armor_color_dataset_generator().from_roco_dataset(_roco_dataset_builder) - for _image, _value, _name in islice(_dataset, 40, 50): - print(_value) - plt.imshow(_image) - plt.show() - plt.clf() + for p, c, _name in islice(_armor_color_dataset, 20, 25): + print(p, c, _name) diff --git a/robots-at-robots/research/robots_at_robots/armor_color/armor_color_pipeline_reporter_factory.py b/robots-at-robots/research/robots_at_robots/armor_color/armor_color_pipeline_reporter_factory.py index 29a07852eac50408bbb2d4ca816c87e661a83029..6ddd9cd7fa351d690e5ff71725bd5af204453054 100644 --- a/robots-at-robots/research/robots_at_robots/armor_color/armor_color_pipeline_reporter_factory.py +++ b/robots-at-robots/research/robots_at_robots/armor_color/armor_color_pipeline_reporter_factory.py @@ -1,24 +1,19 @@ from typing import List -from research.common.dataset.directory_roco_dataset import DirectoryROCODataset -from research.robots_at_robots.armor_color.armor_color_dataset import \ - ArmorColorDatasetCache -from research.robots_at_robots.evaluation.image_pipeline_evaluation_reporter import \ - ImagePipelineEvaluationReporter -from research.robots_at_robots.evaluation.image_pipeline_evaluator import \ - ImagePipelineEvaluator +from research.common.datasets_v3.roco.roco_dataset_builder import ROCODatasetBuilder +from research.robots_at_robots.armor_color.armor_color_dataset import make_armor_color_dataset_generator +from research.robots_at_robots.evaluation.image_pipeline_evaluation_reporter import ImagePipelineEvaluationReporter +from research.robots_at_robots.evaluation.image_pipeline_evaluator import ImagePipelineEvaluator class ArmorColorPipelineReporterFactory: @staticmethod - def from_roco_datasets( - train_roco_datasets: List[DirectoryROCODataset], test_roco_datasets: List[DirectoryROCODataset] - ): + def from_roco_datasets(train_roco_datasets: List[ROCODatasetBuilder], test_roco_datasets: List[ROCODatasetBuilder]): return ImagePipelineEvaluationReporter( evaluator=ImagePipelineEvaluator( train_roco_datasets=train_roco_datasets, test_roco_datasets=test_roco_datasets, - image_dataset_cache=ArmorColorDatasetCache(), + image_dataset_generator=make_armor_color_dataset_generator(), ), evaluation_project="armor-color", ) diff --git a/robots-at-robots/research/robots_at_robots/armor_color/baseline_experiments.py b/robots-at-robots/research/robots_at_robots/armor_color/baseline_experiments.py index 5ee849cb03dc53f5b0731333d5ac2c4e4e7d45ca..e84a24c71a7c905fb1c0c6539a6136820609c1c7 100644 --- a/robots-at-robots/research/robots_at_robots/armor_color/baseline_experiments.py +++ b/robots-at-robots/research/robots_at_robots/armor_color/baseline_experiments.py @@ -1,22 +1,20 @@ import logging -from polystar.common.image_pipeline.classifier_image_pipeline import \ - ClassifierImagePipeline -from polystar.common.image_pipeline.image_featurizer.mean_rgb_channels_featurizer import \ - MeanChannelsFeaturizer +from polystar.common.image_pipeline.classifier_image_pipeline import ClassifierImagePipeline +from polystar.common.image_pipeline.image_featurizer.mean_rgb_channels_featurizer import MeanChannelsFeaturizer from polystar.common.image_pipeline.models.random_model import RandomModel -from polystar.common.image_pipeline.models.red_blue_channels_comparison_model import \ - RedBlueComparisonModel -from research.common.datasets.roco.zoo.roco_datasets_zoo import ROCODatasetsZoo -from research.robots_at_robots.armor_color.armor_color_pipeline_reporter_factory import \ - ArmorColorPipelineReporterFactory +from polystar.common.image_pipeline.models.red_blue_channels_comparison_model import RedBlueComparisonModel +from research.common.datasets_v3.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo +from research.robots_at_robots.armor_color.armor_color_pipeline_reporter_factory import ( + ArmorColorPipelineReporterFactory, +) if __name__ == "__main__": logging.getLogger().setLevel("INFO") reporter = ArmorColorPipelineReporterFactory.from_roco_datasets( - train_roco_datasets=[ROCODatasetsZoo.TWITCH.T470151286, ROCODatasetsZoo.TWITCH.T470150052], - test_roco_datasets=[ROCODatasetsZoo.TWITCH.T470152289], + train_roco_datasets=[ROCODatasetsZoo.TWITCH.T470151286.builder, ROCODatasetsZoo.TWITCH.T470150052.builder], + test_roco_datasets=[ROCODatasetsZoo.TWITCH.T470152289.builder], ) red_blue_comparison_pipeline = ClassifierImagePipeline( diff --git a/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_dataset.py b/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_dataset.py index 4b71926bd7d6ad32f19ffd2e0f268682cdf82b8d..9eb589c25264e363809cecb294beb5c0057a7e01 100644 --- a/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_dataset.py +++ b/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_dataset.py @@ -1,44 +1,28 @@ from itertools import islice -from pathlib import Path from typing import Iterable -import matplotlib.pyplot as plt from polystar.common.filters.keep_filter import KeepFilter from polystar.common.models.object import Armor -from research.common.datasets.dataset import Dataset -from research.common.datasets.filtered_dataset import FilteredTargetsDataset -from research.common.datasets.image_dataset import open_file_dataset -from research.common.datasets.roco.zoo.roco_datasets_zoo import ROCODatasetsZoo -from research.robots_at_robots.dataset.armor_value_dataset import ( - ArmorValueDatasetCache, ArmorValueDirectoryDataset) +from research.common.datasets_v3.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo +from research.robots_at_robots.dataset.armor_value_dataset_generator import ArmorValueDatasetGenerator +from research.robots_at_robots.dataset.armor_value_target_factory import ArmorValueTargetFactory -class ArmorDigitDirectoryDataset(ArmorValueDirectoryDataset[int]): - @staticmethod - def _value_from_str(label: str) -> int: +class ArmorDigitTargetFactory(ArmorValueTargetFactory[int]): + def from_str(self, label: str) -> int: return int(label) - -class ArmorDigitDatasetCache(ArmorValueDatasetCache[str]): - def __init__(self, acceptable_digits: Iterable[int]): - super().__init__("digits") - self.acceptable_digits = acceptable_digits - - def _value_from_armor(self, armor: Armor) -> int: + def from_armor(self, armor: Armor) -> int: return armor.number - def from_directory_and_name(self, directory: Path, name: str) -> Dataset[Path, int]: - full_dataset = ArmorDigitDirectoryDataset(directory, name) - return FilteredTargetsDataset(full_dataset, KeepFilter(self.acceptable_digits)) + +def make_armor_digit_dataset_generator(acceptable_digits: Iterable[int]) -> ArmorValueDatasetGenerator[int]: + return ArmorValueDatasetGenerator("digits", ArmorDigitTargetFactory(), KeepFilter(set(acceptable_digits))) if __name__ == "__main__": - _dataset = open_file_dataset( - ArmorDigitDatasetCache((1, 2, 3, 4, 5, 7)).from_roco_dataset(ROCODatasetsZoo.TWITCH.T470150052) - ) - - for _image, _value, _name in islice(_dataset, 40, 50): - print(_value) - plt.imshow(_image) - plt.show() - plt.clf() + _roco_dataset_builder = ROCODatasetsZoo.DJI.CENTRAL_CHINA.builder + _armor_digit_dataset = make_armor_digit_dataset_generator([1, 2]).from_roco_dataset(_roco_dataset_builder) + + for p, c, _name in islice(_armor_digit_dataset, 20, 30): + print(p, c, _name) diff --git a/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_pipeline_reporter_factory.py b/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_pipeline_reporter_factory.py index ba8ad9733298d62e1bb14f899f7098327b729a3f..9fcdc4025e701a0da66ee0f8c146d93b531df0c0 100644 --- a/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_pipeline_reporter_factory.py +++ b/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_pipeline_reporter_factory.py @@ -1,27 +1,23 @@ from typing import Iterable, List -from research.common.datasets.roco.directory_roco_dataset import \ - DirectoryROCODataset -from research.robots_at_robots.armor_digit.armor_digit_dataset import \ - ArmorDigitDatasetCache -from research.robots_at_robots.evaluation.image_pipeline_evaluation_reporter import \ - ImagePipelineEvaluationReporter -from research.robots_at_robots.evaluation.image_pipeline_evaluator import \ - ImagePipelineEvaluator +from research.common.datasets_v3.roco.roco_dataset_builder import ROCODatasetBuilder +from research.robots_at_robots.armor_digit.armor_digit_dataset import make_armor_digit_dataset_generator +from research.robots_at_robots.evaluation.image_pipeline_evaluation_reporter import ImagePipelineEvaluationReporter +from research.robots_at_robots.evaluation.image_pipeline_evaluator import ImagePipelineEvaluator class ArmorDigitPipelineReporterFactory: @staticmethod def from_roco_datasets( - train_roco_datasets: List[DirectoryROCODataset], - test_roco_datasets: List[DirectoryROCODataset], + train_roco_datasets: List[ROCODatasetBuilder], + test_roco_datasets: List[ROCODatasetBuilder], acceptable_digits: Iterable[int] = (1, 2, 3, 4, 5, 7), ): return ImagePipelineEvaluationReporter( evaluator=ImagePipelineEvaluator( train_roco_datasets=train_roco_datasets, test_roco_datasets=test_roco_datasets, - image_dataset_cache=ArmorDigitDatasetCache(acceptable_digits), + image_dataset_generator=make_armor_digit_dataset_generator(acceptable_digits), ), evaluation_project="armor-digit", ) diff --git a/robots-at-robots/research/robots_at_robots/armor_digit/baseline_experiments.py b/robots-at-robots/research/robots_at_robots/armor_digit/baseline_experiments.py index 132cb96f602998147415f0204b618c5c7a71e8a0..74ed55148a8fdb0add71986c72f0adf42c43c1bd 100644 --- a/robots-at-robots/research/robots_at_robots/armor_digit/baseline_experiments.py +++ b/robots-at-robots/research/robots_at_robots/armor_digit/baseline_experiments.py @@ -1,18 +1,18 @@ import logging -from polystar.common.image_pipeline.classifier_image_pipeline import \ - ClassifierImagePipeline +from polystar.common.image_pipeline.classifier_image_pipeline import ClassifierImagePipeline from polystar.common.image_pipeline.models.random_model import RandomModel -from research.common.datasets.roco.zoo.roco_datasets_zoo import ROCODatasetsZoo -from research.robots_at_robots.armor_digit.armor_digit_pipeline_reporter_factory import \ - ArmorDigitPipelineReporterFactory +from research.common.datasets_v3.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo +from research.robots_at_robots.armor_digit.armor_digit_pipeline_reporter_factory import ( + ArmorDigitPipelineReporterFactory, +) if __name__ == "__main__": logging.getLogger().setLevel("INFO") reporter = ArmorDigitPipelineReporterFactory.from_roco_datasets( - train_roco_datasets=[ROCODatasetsZoo.TWITCH.T470151286, ROCODatasetsZoo.TWITCH.T470150052], - test_roco_datasets=[ROCODatasetsZoo.TWITCH.T470152289], + train_roco_datasets=[ROCODatasetsZoo.TWITCH.T470151286.builder, ROCODatasetsZoo.TWITCH.T470150052.builder], + test_roco_datasets=[ROCODatasetsZoo.TWITCH.T470152289.builder], ) random_pipeline = ClassifierImagePipeline(model=RandomModel(), custom_name="random") diff --git a/robots-at-robots/research/robots_at_robots/dataset/armor_dataset_factory.py b/robots-at-robots/research/robots_at_robots/dataset/armor_dataset_factory.py index 10f1a6b5677010860474a3e968032e65544ef94e..5631ae1f56286e321d0864c4a1c68163b24ca994 100644 --- a/robots-at-robots/research/robots_at_robots/dataset/armor_dataset_factory.py +++ b/robots-at-robots/research/robots_at_robots/dataset/armor_dataset_factory.py @@ -2,28 +2,23 @@ from itertools import islice from typing import Iterator, List, Tuple import matplotlib.pyplot as plt + from polystar.common.models.image import Image from polystar.common.models.object import Armor, ObjectType -from polystar.common.target_pipeline.objects_validators.type_object_validator import \ - TypeObjectValidator -from research.common.datasets.dataset import Dataset, GeneratorDataset -from research.common.datasets.roco.roco_annotation import ROCOAnnotation -from research.common.datasets.roco.roco_dataset import (ROCODataset, - ROCOFileDataset) -from research.common.datasets.roco.zoo.roco_datasets_zoo import ROCODatasetsZoo - -ArmorDataset = Dataset[Image, Armor] - +from polystar.common.target_pipeline.objects_validators.type_object_validator import TypeObjectValidator +from research.common.datasets_v3.lazy_dataset import LazyDataset +from research.common.datasets_v3.roco.roco_annotation import ROCOAnnotation +from research.common.datasets_v3.roco.roco_dataset import LazyROCODataset +from research.common.datasets_v3.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo -class ArmorDatasetFactory: - def __init__(self, dataset: ROCOFileDataset): - self.dataset: ROCODataset = dataset.open() - def make(self) -> ArmorDataset: - return GeneratorDataset(f"{self.dataset.name}_armors", self._make_generator) +class ArmorDataset(LazyDataset[Image, Armor]): + def __init__(self, dataset: LazyROCODataset): + super().__init__(f"{dataset.name}_armors") + self.roco_dataset = dataset - def _make_generator(self) -> Iterator[Tuple[Image, Armor, str]]: - for image, annotation, name in self.dataset: + def __iter__(self) -> Iterator[Tuple[Image, Armor, str]]: + for image, annotation, name in self.roco_dataset: yield from self._generate_from_single(image, annotation, name) @staticmethod @@ -36,7 +31,7 @@ class ArmorDatasetFactory: if __name__ == "__main__": - for _armor_img, _armor, _name in islice(ArmorDatasetFactory(ROCODatasetsZoo.DJI.CentralChina).make(), 20, 30): + for _armor_img, _armor, _name in islice(ArmorDataset(ROCODatasetsZoo.DJI.CENTRAL_CHINA.lazy()), 20, 30): print(_name, repr(_armor)) plt.imshow(_armor_img) plt.show() diff --git a/robots-at-robots/research/robots_at_robots/dataset/armor_value_dataset.py b/robots-at-robots/research/robots_at_robots/dataset/armor_value_dataset.py deleted file mode 100644 index 2f36227d21ad7f3d732fefa5f2788d4d1ecaf7dc..0000000000000000000000000000000000000000 --- a/robots-at-robots/research/robots_at_robots/dataset/armor_value_dataset.py +++ /dev/null @@ -1,94 +0,0 @@ -import json -from abc import ABC, abstractmethod -from dataclasses import dataclass -from pathlib import Path -from shutil import rmtree -from typing import ClassVar, Generic, List, TypeVar - -from polystar.common.models.image import Image, save_image -from polystar.common.models.object import Armor -from polystar.common.utils.time import create_time_id -from polystar.common.utils.tqdm import smart_tqdm -from research.common.datasets.dataset import Dataset -from research.common.datasets.image_dataset import ImageDirectoryDataset -from research.common.datasets.roco.directory_roco_dataset import DirectoryROCODataset -from research.common.datasets.union_dataset import UnionDataset -from research.robots_at_robots.dataset.armor_dataset_factory import ArmorDatasetFactory - -ValueT = TypeVar("ValueT") - - -class ArmorValueDirectoryDataset(Generic[ValueT], ImageDirectoryDataset[ValueT], ABC): - def target_from_image_file(self, image_file: Path) -> ValueT: - return self._value_from_str(image_file.stem.split("-")[-1]) - - @abstractmethod - def _value_from_str(self, label: str) -> ValueT: - pass - - -@dataclass -class WrongVersionException(Exception): - actual: str - expected: str - - -class ArmorValueDatasetCache(Generic[ValueT], ABC): - VERSION: ClassVar[str] = "2.0" - - def __init__(self, task_name: str): - self.task_name = task_name - - def from_roco_datasets(self, roco_datasets: List[DirectoryROCODataset]) -> UnionDataset[Path, ValueT]: - return UnionDataset(map(self.from_roco_dataset, roco_datasets)) - - def from_roco_dataset(self, roco_dataset: DirectoryROCODataset) -> Dataset[Path, ValueT]: - self._generate_if_absent(roco_dataset) - return self.from_directory_and_name( - roco_dataset.main_dir / self.task_name, f"{roco_dataset.name}_armor_{self.task_name}" - ) - - @abstractmethod - def from_directory_and_name(self, directory: Path, name: str) -> Dataset[Path, ValueT]: - pass - - def _generate_if_absent(self, roco_dataset: DirectoryROCODataset): - try: - self._assert_exists_and_is_valid(roco_dataset) - except FileNotFoundError: - self._generate(roco_dataset, "lock not found") - except WrongVersionException as e: - self._generate(roco_dataset, f"upgrade [{e.actual} -> {e.expected}]") - - def _task_dir(self, roco_dataset: DirectoryROCODataset) -> Path: - return roco_dataset.main_dir / self.task_name - - def _generate(self, roco_dataset: DirectoryROCODataset, cause: str = ""): - rmtree(self._task_dir(roco_dataset), ignore_errors=True) - armor_dataset = self._make_dataset(roco_dataset) - if cause: - cause = f"(cause: {cause})" - for image, target, name in smart_tqdm( - armor_dataset, desc=f"Generating dataset {roco_dataset.name}_{self.task_name} {cause}", unit="frame" - ): - save_image(image, self._task_dir(roco_dataset) / f"{name}-{target}.jpg") - self._lock_file(roco_dataset).write_text(json.dumps({"version": self.VERSION, "date": create_time_id()})) - - def _assert_exists_and_is_valid(self, roco_dataset: DirectoryROCODataset): - lock = self._lock_file(roco_dataset) - if not lock.exists(): - raise FileNotFoundError() - - version = json.loads(lock.read_text())["version"] - if version != self.VERSION: - raise WrongVersionException(version, self.VERSION) - - def _make_dataset(self, roco_dataset) -> Dataset[Image, ValueT]: - return ArmorDatasetFactory(roco_dataset).make().transform_targets(self._value_from_armor) - - def _lock_file(self, roco_dataset: DirectoryROCODataset) -> Path: - return self._task_dir(roco_dataset) / ".lock" - - @abstractmethod - def _value_from_armor(self, armor: Armor) -> ValueT: - pass diff --git a/robots-at-robots/research/robots_at_robots/dataset/armor_value_dataset_cache.py b/robots-at-robots/research/robots_at_robots/dataset/armor_value_dataset_cache.py new file mode 100644 index 0000000000000000000000000000000000000000..a7b7c788e609badff69b93572e86eaad39de029c --- /dev/null +++ b/robots-at-robots/research/robots_at_robots/dataset/armor_value_dataset_cache.py @@ -0,0 +1,60 @@ +import json +from pathlib import Path +from shutil import rmtree +from typing import ClassVar, Generic, Optional + +from polystar.common.models.image import Image, save_image +from polystar.common.utils.misc import identity +from polystar.common.utils.time import create_time_id +from polystar.common.utils.tqdm import smart_tqdm +from research.common.datasets_v3.lazy_dataset import LazyDataset, TargetT +from research.common.datasets_v3.roco.roco_dataset_builder import ROCODatasetBuilder +from research.common.datasets_v3.transform_dataset import TransformDataset +from research.robots_at_robots.dataset.armor_dataset_factory import ArmorDataset +from research.robots_at_robots.dataset.armor_value_target_factory import ArmorValueTargetFactory + + +class ArmorValueDatasetCache(Generic[TargetT]): + VERSION: ClassVar[str] = "2.0" + + def __init__( + self, + roco_dataset_builder: ROCODatasetBuilder, + cache_dir: Path, + dataset_name: str, + target_factory: ArmorValueTargetFactory[TargetT], + ): + self.target_factory = target_factory + self.dataset_name = dataset_name + self.cache_dir = cache_dir + self.roco_dataset_builder = roco_dataset_builder + self.lock_file = cache_dir / ".lock" + + def generate_if_needed(self): + cause = self._get_generation_cause() + if cause is None: + return + self._clean_cache_dir() + self.save(self._generate(), cause) + + def _clean_cache_dir(self): + rmtree(self.cache_dir, ignore_errors=True) + self.cache_dir.mkdir() + + def save(self, dataset: LazyDataset[Image, TargetT], cause: str): + desc = f"Generating dataset {self.dataset_name} (cause: {cause})" + for img, target, name in smart_tqdm(dataset, desc=desc, unit="img"): + save_image(img, self.cache_dir / f"{name}-{target}.jpg") + self.lock_file.write_text(json.dumps({"version": self.VERSION, "date": create_time_id()})) + + def _generate(self) -> LazyDataset[Image, TargetT]: + return TransformDataset( + ArmorDataset(self.roco_dataset_builder.to_images().build_lazy()), identity, self.target_factory.from_armor + ) + + def _get_generation_cause(self) -> Optional[str]: + if not self.lock_file.exists(): + return "lock not found" + version = json.loads(self.lock_file.read_text())["version"] + if version != self.VERSION: + return f"upgrade [{version} -> {self.VERSION}]" diff --git a/robots-at-robots/research/robots_at_robots/dataset/armor_value_dataset_generator.py b/robots-at-robots/research/robots_at_robots/dataset/armor_value_dataset_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..f95138ff4ef42b0d748c438d7f21cc4e409d4707 --- /dev/null +++ b/robots-at-robots/research/robots_at_robots/dataset/armor_value_dataset_generator.py @@ -0,0 +1,40 @@ +from pathlib import Path +from typing import Generic, List + +from polystar.common.filters.filter_abc import FilterABC +from polystar.common.filters.pass_through_filter import PassThroughFilter +from research.common.datasets_v3.dataset import Dataset +from research.common.datasets_v3.image_file_dataset_builder import DirectoryDatasetBuilder +from research.common.datasets_v3.lazy_dataset import TargetT +from research.common.datasets_v3.roco.roco_dataset_builder import ROCODatasetBuilder +from research.common.datasets_v3.union_dataset import UnionDataset +from research.robots_at_robots.dataset.armor_value_dataset_cache import ArmorValueDatasetCache +from research.robots_at_robots.dataset.armor_value_target_factory import ArmorValueTargetFactory + + +class ArmorValueDatasetGenerator(Generic[TargetT]): + def __init__( + self, + task_name: str, + target_factory: ArmorValueTargetFactory[TargetT], + targets_filter: FilterABC[TargetT] = None, + ): + self.target_factory = target_factory + self.task_name = task_name + self.targets_filter = targets_filter or PassThroughFilter() + + def from_roco_datasets(self, roco_datasets: List[ROCODatasetBuilder]) -> UnionDataset[Path, TargetT]: + return UnionDataset(map(self.from_roco_dataset, roco_datasets)) + + def from_roco_dataset(self, roco_dataset_builder: ROCODatasetBuilder) -> Dataset[Path, TargetT]: + cache_dir = roco_dataset_builder.main_dir / self.task_name + dataset_name = f"{roco_dataset_builder.name}_armor_{self.task_name}" + + cache = ArmorValueDatasetCache(roco_dataset_builder, cache_dir, dataset_name, self.target_factory) + cache.generate_if_needed() + + return ( + DirectoryDatasetBuilder(cache_dir, self.target_factory.from_file, dataset_name) + .filter_targets(self.targets_filter) + .build() + ) diff --git a/robots-at-robots/research/robots_at_robots/dataset/armor_value_target_factory.py b/robots-at-robots/research/robots_at_robots/dataset/armor_value_target_factory.py new file mode 100644 index 0000000000000000000000000000000000000000..2d5da793d7487a54481c10678ad64004a4865f28 --- /dev/null +++ b/robots-at-robots/research/robots_at_robots/dataset/armor_value_target_factory.py @@ -0,0 +1,19 @@ +from abc import abstractmethod +from pathlib import Path +from typing import Generic + +from polystar.common.models.object import Armor +from research.common.datasets_v3.lazy_dataset import TargetT + + +class ArmorValueTargetFactory(Generic[TargetT]): + def from_file(self, file: Path) -> TargetT: + return self.from_str(file.stem.split("-")[-1]) + + @abstractmethod + def from_str(self, label: str) -> TargetT: + pass + + @abstractmethod + def from_armor(self, armor: Armor) -> TargetT: + pass diff --git a/robots-at-robots/research/robots_at_robots/demos/demo_infer.py b/robots-at-robots/research/robots_at_robots/demos/demo_infer.py index a5f11bd01db3f4de0b98d2e4aa9e5208d786cdd2..56c9dad2d6959ca0265048561be803b819e20a49 100644 --- a/robots-at-robots/research/robots_at_robots/demos/demo_infer.py +++ b/robots-at-robots/research/robots_at_robots/demos/demo_infer.py @@ -1,28 +1,24 @@ from polystar.common.models.label_map import LabelMap -from polystar.common.target_pipeline.objects_detectors.tf_model_objects_detector import \ - TFModelObjectsDetector -from polystar.common.target_pipeline.objects_validators.confidence_object_validator import \ - ConfidenceObjectValidator +from polystar.common.target_pipeline.detected_objects.detected_objects_factory import DetectedObjectFactory +from polystar.common.target_pipeline.objects_detectors.tf_model_objects_detector import TFModelObjectsDetector +from polystar.common.target_pipeline.objects_validators.confidence_object_validator import ConfidenceObjectValidator from polystar.common.utils.tensorflow import patch_tf_v2 from polystar.common.view.plt_results_viewer import PltResultViewer from polystar.robots_at_robots.dependency_injection import make_injector -from research.common.dataset.dji.dji_roco_datasets import DJIROCODataset +from research.common.datasets_v3.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo from research.robots_at_robots.demos.utils import load_tf_model if __name__ == "__main__": patch_tf_v2() injector = make_injector() - objects_detector = TFModelObjectsDetector(load_tf_model(), injector.get(LabelMap)) + objects_detector = TFModelObjectsDetector(DetectedObjectFactory(injector.get(LabelMap), []), load_tf_model()) filters = [ConfidenceObjectValidator(confidence_threshold=0.5)] with PltResultViewer("Demo of tf model") as viewer: - for i, image in enumerate(DJIROCODataset.CentralChina.images): + for image, _, _ in ROCODatasetsZoo.DJI.CENTRAL_CHINA.builder.to_images().cap(5): objects = objects_detector.detect(image) for f in filters: objects = f.filter(objects, image) viewer.display_image_with_objects(image, objects) - - if i == 5: - break diff --git a/robots-at-robots/research/robots_at_robots/demos/demo_pipeline.py b/robots-at-robots/research/robots_at_robots/demos/demo_pipeline.py index 8ab9b2b4a4b8c6e73165f6a4f18e6f3b4ed5d786..a049f39b39b71a936ddfb0268c1896c2bd36de9f 100644 --- a/robots-at-robots/research/robots_at_robots/demos/demo_pipeline.py +++ b/robots-at-robots/research/robots_at_robots/demos/demo_pipeline.py @@ -1,36 +1,24 @@ import cv2 + from polystar.common.communication.print_target_sender import PrintTargetSender -from polystar.common.image_pipeline.classifier_image_pipeline import \ - ClassifierImagePipeline -from polystar.common.image_pipeline.image_featurizer.mean_rgb_channels_featurizer import \ - MeanChannelsFeaturizer -from polystar.common.image_pipeline.models.red_blue_channels_comparison_model import \ - RedBlueComparisonModel +from polystar.common.image_pipeline.classifier_image_pipeline import ClassifierImagePipeline +from polystar.common.image_pipeline.image_featurizer.mean_rgb_channels_featurizer import MeanChannelsFeaturizer +from polystar.common.image_pipeline.models.red_blue_channels_comparison_model import RedBlueComparisonModel from polystar.common.models.camera import Camera from polystar.common.models.label_map import LabelMap -from polystar.common.target_pipeline.armors_descriptors.armors_color_descriptor import \ - ArmorsColorDescriptor +from polystar.common.target_pipeline.armors_descriptors.armors_color_descriptor import ArmorsColorDescriptor from polystar.common.target_pipeline.debug_pipeline import DebugTargetPipeline -from polystar.common.target_pipeline.detected_objects.detected_objects_factory import \ - DetectedObjectFactory -from polystar.common.target_pipeline.object_selectors.closest_object_selector import \ - ClosestObjectSelector -from polystar.common.target_pipeline.objects_detectors.tf_model_objects_detector import \ - TFModelObjectsDetector -from polystar.common.target_pipeline.objects_linker.simple_objects_linker import \ - SimpleObjectsLinker -from polystar.common.target_pipeline.objects_validators.confidence_object_validator import \ - ConfidenceObjectValidator -from polystar.common.target_pipeline.target_factories.ratio_simple_target_factory import \ - RatioSimpleTargetFactory -from polystar.common.target_pipeline.target_pipeline import \ - NoTargetFoundException +from polystar.common.target_pipeline.detected_objects.detected_objects_factory import DetectedObjectFactory +from polystar.common.target_pipeline.object_selectors.closest_object_selector import ClosestObjectSelector +from polystar.common.target_pipeline.objects_detectors.tf_model_objects_detector import TFModelObjectsDetector +from polystar.common.target_pipeline.objects_linker.simple_objects_linker import SimpleObjectsLinker +from polystar.common.target_pipeline.objects_validators.confidence_object_validator import ConfidenceObjectValidator +from polystar.common.target_pipeline.target_factories.ratio_simple_target_factory import RatioSimpleTargetFactory +from polystar.common.target_pipeline.target_pipeline import NoTargetFoundException from polystar.common.utils.tensorflow import patch_tf_v2 from polystar.common.view.plt_results_viewer import PltResultViewer from polystar.robots_at_robots.dependency_injection import make_injector -from research.common.dataset.dji.dji_roco_datasets import DJIROCODataset -from research.common.dataset.twitch.twitch_roco_datasets import \ - TwitchROCODataset +from research.common.datasets_v3.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo from research.robots_at_robots.demos.utils import load_tf_model if __name__ == "__main__": @@ -59,8 +47,8 @@ if __name__ == "__main__": ) with PltResultViewer("Demo of tf model") as viewer: - for dset in (TwitchROCODataset.TWITCH_470150052, DJIROCODataset.CentralChina): - for i, image_path in enumerate(dset.image_paths): + for dset in (ROCODatasetsZoo.TWITCH.T470150052, ROCODatasetsZoo.DJI.CENTRAL_CHINA): + for image_path, _, _ in dset.builder.cap(5): try: image = cv2.cvtColor(cv2.imread(str(image_path)), cv2.COLOR_BGR2RGB) target = pipeline.predict_target(image) @@ -68,6 +56,3 @@ if __name__ == "__main__": pass finally: viewer.display_debug_info(pipeline.debug_info_) - - if i == 5: - break diff --git a/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py b/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py index b65d0746a989d70a3cf5cc050f8be4998b0db940..226758b843358c09407f5739c5822e86bce40742 100644 --- a/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py +++ b/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py @@ -15,8 +15,8 @@ from polystar.common.utils.dataframe import Format, format_df_column, format_df_ from polystar.common.utils.markdown import MarkdownFile from polystar.common.utils.time import create_time_id from research.common.constants import DSET_DIR, EVALUATION_DIR -from research.common.datasets.roco.roco_dataset import ROCOFileDataset -from research.robots_at_robots.dataset.armor_value_dataset import ValueT +from research.common.datasets_v3.lazy_dataset import TargetT +from research.common.datasets_v3.roco.roco_dataset_builder import ROCODatasetBuilder from research.robots_at_robots.evaluation.image_pipeline_evaluator import ( ClassificationResults, ImagePipelineEvaluator, @@ -25,8 +25,8 @@ from research.robots_at_robots.evaluation.image_pipeline_evaluator import ( @dataclass -class ImagePipelineEvaluationReporter(Generic[ValueT]): - evaluator: ImagePipelineEvaluator[ValueT] +class ImagePipelineEvaluationReporter(Generic[TargetT]): + evaluator: ImagePipelineEvaluator[TargetT] evaluation_project: str main_metric: Tuple[str, str] = ("f1-score", "weighted avg") @@ -57,7 +57,7 @@ class ImagePipelineEvaluationReporter(Generic[ValueT]): @staticmethod def _report_dataset( - mf: MarkdownFile, roco_datasets: List[ROCOFileDataset], dataset_sizes: List[int], labels: List[Any] + mf: MarkdownFile, roco_datasets: List[ROCODatasetBuilder], dataset_sizes: List[int], labels: List[Any] ): total = len(labels) mf.paragraph(f"{total} images") @@ -72,7 +72,7 @@ class ImagePipelineEvaluationReporter(Generic[ValueT]): mf.table(df) def _report_aggregated_results( - self, mf: MarkdownFile, pipeline2results: Dict[str, ClassificationResults[ValueT]], report_dir: Path + self, mf: MarkdownFile, pipeline2results: Dict[str, ClassificationResults[TargetT]], report_dir: Path ): fig, (ax_test, ax_train) = plt.subplots(1, 2, figsize=(16, 5)) aggregated_test_results = self._aggregate_results(pipeline2results, ax_test, "test") @@ -88,11 +88,11 @@ class ImagePipelineEvaluationReporter(Generic[ValueT]): mf.paragraph("On train set:") mf.table(aggregated_train_results) - def _report_pipelines_results(self, mf: MarkdownFile, pipeline2results: Dict[str, ClassificationResults[ValueT]]): + def _report_pipelines_results(self, mf: MarkdownFile, pipeline2results: Dict[str, ClassificationResults[TargetT]]): for pipeline_name, results in pipeline2results.items(): self._report_pipeline_results(mf, pipeline_name, results) - def _report_pipeline_results(self, mf: MarkdownFile, pipeline_name: str, results: ClassificationResults[ValueT]): + def _report_pipeline_results(self, mf: MarkdownFile, pipeline_name: str, results: ClassificationResults[TargetT]): mf.title(pipeline_name, level=2) mf.paragraph(results.full_pipeline_name) @@ -109,7 +109,7 @@ class ImagePipelineEvaluationReporter(Generic[ValueT]): @staticmethod def _report_pipeline_set_results( - mf: MarkdownFile, results: SetClassificationResults[ValueT], image_paths: List[Path] + mf: MarkdownFile, results: SetClassificationResults[TargetT], image_paths: List[Path] ): mf.title("Metrics", level=4) mf.paragraph(f"Inference time: {results.mean_inference_time: .2e} s/img") @@ -137,7 +137,7 @@ class ImagePipelineEvaluationReporter(Generic[ValueT]): ) def _aggregate_results( - self, pipeline2results: Dict[str, ClassificationResults[ValueT]], ax: Axes, set_: str + self, pipeline2results: Dict[str, ClassificationResults[TargetT]], ax: Axes, set_: str ) -> DataFrame: main_metric_name = f"{self.main_metric[0]} {self.main_metric[1]}" df = ( diff --git a/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluator.py b/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluator.py index 30e3092511998a9c310b2682672ec7b8227b5cfb..aab735298fa8fe846919f32a57142cdebb5c52dd 100644 --- a/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluator.py +++ b/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluator.py @@ -6,15 +6,17 @@ from typing import Dict, Generic, Iterable, List, Sequence, Tuple import numpy as np from memoized_property import memoized_property +from sklearn.metrics import classification_report, confusion_matrix + from polystar.common.image_pipeline.image_pipeline import ImagePipeline from polystar.common.models.image import Image, load_images -from research.common.datasets.roco.directory_roco_dataset import DirectoryROCODataset -from research.robots_at_robots.dataset.armor_value_dataset import ArmorValueDatasetCache, ValueT -from sklearn.metrics import classification_report, confusion_matrix +from research.common.datasets_v3.lazy_dataset import TargetT +from research.common.datasets_v3.roco.roco_dataset_builder import ROCODatasetBuilder +from research.robots_at_robots.dataset.armor_value_dataset_generator import ArmorValueDatasetGenerator @dataclass -class SetClassificationResults(Generic[ValueT]): +class SetClassificationResults(Generic[TargetT]): labels: np.ndarray predictions: np.ndarray mean_inference_time: float @@ -32,37 +34,37 @@ class SetClassificationResults(Generic[ValueT]): return np.where(self.labels != self.predictions)[0] @memoized_property - def unique_labels(self) -> List[ValueT]: + def unique_labels(self) -> List[TargetT]: return sorted(set(self.labels) | set(self.predictions)) @dataclass -class ClassificationResults(Generic[ValueT]): - train_results: SetClassificationResults[ValueT] - test_results: SetClassificationResults[ValueT] +class ClassificationResults(Generic[TargetT]): + train_results: SetClassificationResults[TargetT] + test_results: SetClassificationResults[TargetT] full_pipeline_name: str - def on_set(self, set_: str) -> SetClassificationResults[ValueT]: + def on_set(self, set_: str) -> SetClassificationResults[TargetT]: if set_ is "train": return self.train_results return self.test_results -class ImagePipelineEvaluator(Generic[ValueT]): +class ImagePipelineEvaluator(Generic[TargetT]): def __init__( self, - train_roco_datasets: List[DirectoryROCODataset], - test_roco_datasets: List[DirectoryROCODataset], - image_dataset_cache: ArmorValueDatasetCache[ValueT], + train_roco_datasets: List[ROCODatasetBuilder], + test_roco_datasets: List[ROCODatasetBuilder], + image_dataset_generator: ArmorValueDatasetGenerator[TargetT], ): logging.info("Loading data") self.train_roco_datasets = train_roco_datasets self.test_roco_datasets = test_roco_datasets (self.train_images_paths, self.train_images, self.train_labels, self.train_dataset_sizes) = load_datasets( - train_roco_datasets, image_dataset_cache + train_roco_datasets, image_dataset_generator ) (self.test_images_paths, self.test_images, self.test_labels, self.test_dataset_sizes) = load_datasets( - test_roco_datasets, image_dataset_cache + test_roco_datasets, image_dataset_generator ) def evaluate_pipelines(self, pipelines: Iterable[ImagePipeline]) -> Dict[str, ClassificationResults]: @@ -82,7 +84,7 @@ class ImagePipelineEvaluator(Generic[ValueT]): @staticmethod def _evaluate_pipeline_on_set( - pipeline: ImagePipeline, images: List[Image], labels: List[ValueT] + pipeline: ImagePipeline, images: List[Image], labels: List[TargetT] ) -> SetClassificationResults: t = time() preds = pipeline.predict(images) @@ -91,9 +93,9 @@ class ImagePipelineEvaluator(Generic[ValueT]): def load_datasets( - roco_datasets: List[DirectoryROCODataset], image_dataset_cache: ArmorValueDatasetCache, -) -> Tuple[List[Path], List[Image], List[ValueT], List[int]]: - dataset = image_dataset_cache.from_roco_datasets(roco_datasets) + roco_datasets: List[ROCODatasetBuilder], image_dataset_generator: ArmorValueDatasetGenerator[TargetT], +) -> Tuple[List[Path], List[Image], List[TargetT], List[int]]: + dataset = image_dataset_generator.from_roco_datasets(roco_datasets) dataset_sizes = [len(d) for d in dataset.datasets] paths, targets = list(dataset.examples), list(dataset.targets)