From 132a973c253b413733cd87e9ffe5451d4c44a743 Mon Sep 17 00:00:00 2001 From: Mathieu Beligon <mathieu@feedly.com> Date: Thu, 10 Sep 2020 22:59:59 +0200 Subject: [PATCH] [common] (datasets) New, more generic dataset, with the ROCO example --- common/research/common/datasets/__init__.py | 0 .../research/common/datasets/image_dataset.py | 45 +++++++++++++++++++ .../research/common/datasets/roco/__init__.py | 0 .../datasets/roco/directory_roco_dataset.py | 34 ++++++++++++++ .../common/datasets/roco/roco_annotation.py | 31 +++++++++++++ .../common/datasets/roco/roco_dataset.py | 4 ++ .../common/datasets/roco/roco_datasets.py | 15 +++++++ .../common/datasets/roco/zoo/__init__.py | 0 .../research/common/datasets/roco/zoo/dji.py | 15 +++++++ .../common/datasets/roco/zoo/dji_zoomed.py | 16 +++++++ .../datasets/roco/zoo/roco_datasets_zoo.py | 9 ++++ .../common/datasets/roco/zoo/twitch.py | 20 +++++++++ 12 files changed, 189 insertions(+) create mode 100644 common/research/common/datasets/__init__.py create mode 100644 common/research/common/datasets/image_dataset.py create mode 100644 common/research/common/datasets/roco/__init__.py create mode 100644 common/research/common/datasets/roco/directory_roco_dataset.py create mode 100644 common/research/common/datasets/roco/roco_annotation.py create mode 100644 common/research/common/datasets/roco/roco_dataset.py create mode 100644 common/research/common/datasets/roco/roco_datasets.py create mode 100644 common/research/common/datasets/roco/zoo/__init__.py create mode 100644 common/research/common/datasets/roco/zoo/dji.py create mode 100644 common/research/common/datasets/roco/zoo/dji_zoomed.py create mode 100644 common/research/common/datasets/roco/zoo/roco_datasets_zoo.py create mode 100644 common/research/common/datasets/roco/zoo/twitch.py diff --git a/common/research/common/datasets/__init__.py b/common/research/common/datasets/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/common/research/common/datasets/image_dataset.py b/common/research/common/datasets/image_dataset.py new file mode 100644 index 0000000..6dca7c0 --- /dev/null +++ b/common/research/common/datasets/image_dataset.py @@ -0,0 +1,45 @@ +from typing import Generic, Iterator, List, Tuple, TypeVar + +from polystar.common.models.image import Image + +TargetT = TypeVar("TargetT") + + +class ImageDataset(Generic[TargetT]): + def __init__(self, images: List[Image] = None, targets: List[TargetT] = None): + self._targets = targets + self._images = images + self._check_consistency() + + def __iter__(self) -> Iterator[Tuple[Image, TargetT]]: + return zip(self.images, self.targets) + + @property + def images(self) -> List[Image]: + self._load_data() + return self._images + + @property + def targets(self) -> List[TargetT]: + self._load_data() + return self._targets + + def _load_data(self): + if self._is_loaded: + return + images, targets = zip(*self) + self._images, self._targets = list(images), list(targets) + self._check_consistency() + + def _check_consistency(self): + assert self._is_loaded or self._has_custom_load + if self._is_loaded: + assert len(self.targets) == len(self.images) + + @property + def _is_loaded(self) -> bool: + return self._images is not None and self._targets is not None + + @property + def _has_custom_load(self) -> bool: + return not self.__iter__.__qualname__.startswith("ImageDataset") diff --git a/common/research/common/datasets/roco/__init__.py b/common/research/common/datasets/roco/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/common/research/common/datasets/roco/directory_roco_dataset.py b/common/research/common/datasets/roco/directory_roco_dataset.py new file mode 100644 index 0000000..b11b707 --- /dev/null +++ b/common/research/common/datasets/roco/directory_roco_dataset.py @@ -0,0 +1,34 @@ +from pathlib import Path +from typing import Iterable, Tuple + +from polystar.common.models.image import Image +from research.common.datasets.roco.roco_annotation import ROCOAnnotation +from research.common.datasets.roco.roco_dataset import ROCODataset + + +class DirectoryROCODataset(ROCODataset): + def __init__(self, dataset_path: Path, dataset_name: str): + self.dataset_name = dataset_name + self.dataset_path = dataset_path + + @property + def images_dir_path(self) -> Path: + return self.dataset_path / "image" + + @property + def annotation_paths(self) -> Iterable[Path]: + return sorted(self.annotations_dir_path.glob("*.xml")) + + @property + def annotations_dir_path(self) -> Path: + return self.dataset_path / "image_annotation" + + def __iter__(self) -> Iterable[Tuple[Image, ROCOAnnotation]]: + for annotation_file in self.annotation_paths: + yield self._load_from_annotation_file(annotation_file) + + def _load_from_annotation_file(self, annotation_file: Path) -> Tuple[Image, ROCOAnnotation]: + return ( + Image.from_path(self.images_dir_path / f"{annotation_file.stem}.jpg"), + ROCOAnnotation.from_xml_file(annotation_file), + ) diff --git a/common/research/common/datasets/roco/roco_annotation.py b/common/research/common/datasets/roco/roco_annotation.py new file mode 100644 index 0000000..60e069b --- /dev/null +++ b/common/research/common/datasets/roco/roco_annotation.py @@ -0,0 +1,31 @@ +import logging +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, List + +import xmltodict +from polystar.common.models.object import Object, ObjectFactory + + +@dataclass +class ROCOAnnotation: + objects: List[Object] + + has_rune: bool + + @staticmethod + def from_xml_file(xml_file: Path) -> "ROCOAnnotation": + try: + return ROCOAnnotation.from_xml_dict(xmltodict.parse(xml_file.read_text())["annotation"]) + except Exception as e: + logging.exception(f"Error parsing annotation file {xml_file}") + raise e + + @staticmethod + def from_xml_dict(xml_dict: Dict) -> "ROCOAnnotation": + json_objects = xml_dict.get("object", []) or [] + json_objects = json_objects if isinstance(json_objects, list) else [json_objects] + roco_json_objects = [obj_json for obj_json in json_objects if not obj_json["name"].startswith("rune")] + objects = [ObjectFactory.from_json(obj_json) for obj_json in roco_json_objects] + + return ROCOAnnotation(objects=objects, has_rune=len(roco_json_objects) != len(json_objects)) diff --git a/common/research/common/datasets/roco/roco_dataset.py b/common/research/common/datasets/roco/roco_dataset.py new file mode 100644 index 0000000..57abf82 --- /dev/null +++ b/common/research/common/datasets/roco/roco_dataset.py @@ -0,0 +1,4 @@ +from research.common.datasets.image_dataset import ImageDataset +from research.common.datasets.roco.roco_annotation import ROCOAnnotation + +ROCODataset = ImageDataset[ROCOAnnotation] diff --git a/common/research/common/datasets/roco/roco_datasets.py b/common/research/common/datasets/roco/roco_datasets.py new file mode 100644 index 0000000..c716d29 --- /dev/null +++ b/common/research/common/datasets/roco/roco_datasets.py @@ -0,0 +1,15 @@ +from typing import Any, Tuple + +from research.common.dataset.directory_roco_dataset import DirectoryROCODataset + + +class ROCODatasets: + def _make_dataset(dataset_name: str, *args: Any) -> DirectoryROCODataset: + pass + + def __init_subclass__(cls, **kwargs): + for dataset_name, args in cls.__dict__.items(): + if not callable(args) and not dataset_name.startswith("_"): + if not isinstance(args, Tuple): + args = (args,) + setattr(cls, dataset_name, cls._make_dataset(dataset_name, *args)) diff --git a/common/research/common/datasets/roco/zoo/__init__.py b/common/research/common/datasets/roco/zoo/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/common/research/common/datasets/roco/zoo/dji.py b/common/research/common/datasets/roco/zoo/dji.py new file mode 100644 index 0000000..4a630e7 --- /dev/null +++ b/common/research/common/datasets/roco/zoo/dji.py @@ -0,0 +1,15 @@ +from research.common.constants import DJI_ROCO_DSET_DIR +from research.common.datasets.roco.directory_roco_dataset import \ + DirectoryROCODataset +from research.common.datasets.roco.roco_datasets import ROCODatasets + + +class DJIROCODatasets(ROCODatasets): + CentralChina = "robomaster_Central China Regional Competition" + NorthChina = "robomaster_North China Regional Competition" + SouthChina = "robomaster_South China Regional Competition" + Final = "robomaster_Final Tournament" + + @staticmethod + def _make_dataset(dataset_name: str, competition_name: str) -> DirectoryROCODataset: + return DirectoryROCODataset(DJI_ROCO_DSET_DIR / competition_name, dataset_name) diff --git a/common/research/common/datasets/roco/zoo/dji_zoomed.py b/common/research/common/datasets/roco/zoo/dji_zoomed.py new file mode 100644 index 0000000..009ffac --- /dev/null +++ b/common/research/common/datasets/roco/zoo/dji_zoomed.py @@ -0,0 +1,16 @@ +from polystar.common.utils.str_utils import camel2snake +from research.common.constants import DJI_ROCO_ZOOMED_DSET_DIR +from research.common.datasets.roco.directory_roco_dataset import \ + DirectoryROCODataset +from research.common.datasets.roco.roco_datasets import ROCODatasets + + +class DJIROCOZoomedDatasets(ROCODatasets): + CentralChina = () + NorthChina = () + SouthChina = () + Final = () + + @staticmethod + def _make_dataset(dataset_name: str) -> DirectoryROCODataset: + return DirectoryROCODataset(DJI_ROCO_ZOOMED_DSET_DIR / camel2snake(dataset_name), f"{dataset_name}ZoomedV2") diff --git a/common/research/common/datasets/roco/zoo/roco_datasets_zoo.py b/common/research/common/datasets/roco/zoo/roco_datasets_zoo.py new file mode 100644 index 0000000..a11d78b --- /dev/null +++ b/common/research/common/datasets/roco/zoo/roco_datasets_zoo.py @@ -0,0 +1,9 @@ +from research.common.datasets.roco.zoo.dji import DJIROCODatasets +from research.common.datasets.roco.zoo.dji_zoomed import DJIROCOZoomedDatasets +from research.common.datasets.roco.zoo.twitch import TwitchROCODatasets + + +class ROCODatasetsZoo: + DJI_ZOOMED = DJIROCOZoomedDatasets() + DJI = DJIROCODatasets() + TWITCH = TwitchROCODatasets() diff --git a/common/research/common/datasets/roco/zoo/twitch.py b/common/research/common/datasets/roco/zoo/twitch.py new file mode 100644 index 0000000..dfb4963 --- /dev/null +++ b/common/research/common/datasets/roco/zoo/twitch.py @@ -0,0 +1,20 @@ +from research.common.constants import TWITCH_DSET_DIR +from research.common.datasets.roco.directory_roco_dataset import \ + DirectoryROCODataset +from research.common.datasets.roco.roco_datasets import ROCODatasets + + +class TwitchROCODatasets(ROCODatasets): + TWITCH_470149568 = () + TWITCH_470150052 = () + TWITCH_470151286 = () + TWITCH_470152289 = () + TWITCH_470152730 = () + TWITCH_470152838 = () + TWITCH_470153081 = () + TWITCH_470158483 = () + + @staticmethod + def _make_dataset(dataset_name: str) -> DirectoryROCODataset: + twitch_id = dataset_name[len("TWITCH_") :] + return DirectoryROCODataset(TWITCH_DSET_DIR / "v1" / twitch_id, f"T{twitch_id}") -- GitLab