From 132a973c253b413733cd87e9ffe5451d4c44a743 Mon Sep 17 00:00:00 2001
From: Mathieu Beligon <mathieu@feedly.com>
Date: Thu, 10 Sep 2020 22:59:59 +0200
Subject: [PATCH] [common] (datasets) New, more generic dataset, with the ROCO
 example

---
 common/research/common/datasets/__init__.py   |  0
 .../research/common/datasets/image_dataset.py | 45 +++++++++++++++++++
 .../research/common/datasets/roco/__init__.py |  0
 .../datasets/roco/directory_roco_dataset.py   | 34 ++++++++++++++
 .../common/datasets/roco/roco_annotation.py   | 31 +++++++++++++
 .../common/datasets/roco/roco_dataset.py      |  4 ++
 .../common/datasets/roco/roco_datasets.py     | 15 +++++++
 .../common/datasets/roco/zoo/__init__.py      |  0
 .../research/common/datasets/roco/zoo/dji.py  | 15 +++++++
 .../common/datasets/roco/zoo/dji_zoomed.py    | 16 +++++++
 .../datasets/roco/zoo/roco_datasets_zoo.py    |  9 ++++
 .../common/datasets/roco/zoo/twitch.py        | 20 +++++++++
 12 files changed, 189 insertions(+)
 create mode 100644 common/research/common/datasets/__init__.py
 create mode 100644 common/research/common/datasets/image_dataset.py
 create mode 100644 common/research/common/datasets/roco/__init__.py
 create mode 100644 common/research/common/datasets/roco/directory_roco_dataset.py
 create mode 100644 common/research/common/datasets/roco/roco_annotation.py
 create mode 100644 common/research/common/datasets/roco/roco_dataset.py
 create mode 100644 common/research/common/datasets/roco/roco_datasets.py
 create mode 100644 common/research/common/datasets/roco/zoo/__init__.py
 create mode 100644 common/research/common/datasets/roco/zoo/dji.py
 create mode 100644 common/research/common/datasets/roco/zoo/dji_zoomed.py
 create mode 100644 common/research/common/datasets/roco/zoo/roco_datasets_zoo.py
 create mode 100644 common/research/common/datasets/roco/zoo/twitch.py

diff --git a/common/research/common/datasets/__init__.py b/common/research/common/datasets/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/common/research/common/datasets/image_dataset.py b/common/research/common/datasets/image_dataset.py
new file mode 100644
index 0000000..6dca7c0
--- /dev/null
+++ b/common/research/common/datasets/image_dataset.py
@@ -0,0 +1,45 @@
+from typing import Generic, Iterator, List, Tuple, TypeVar
+
+from polystar.common.models.image import Image
+
+TargetT = TypeVar("TargetT")
+
+
+class ImageDataset(Generic[TargetT]):
+    def __init__(self, images: List[Image] = None, targets: List[TargetT] = None):
+        self._targets = targets
+        self._images = images
+        self._check_consistency()
+
+    def __iter__(self) -> Iterator[Tuple[Image, TargetT]]:
+        return zip(self.images, self.targets)
+
+    @property
+    def images(self) -> List[Image]:
+        self._load_data()
+        return self._images
+
+    @property
+    def targets(self) -> List[TargetT]:
+        self._load_data()
+        return self._targets
+
+    def _load_data(self):
+        if self._is_loaded:
+            return
+        images, targets = zip(*self)
+        self._images, self._targets = list(images), list(targets)
+        self._check_consistency()
+
+    def _check_consistency(self):
+        assert self._is_loaded or self._has_custom_load
+        if self._is_loaded:
+            assert len(self.targets) == len(self.images)
+
+    @property
+    def _is_loaded(self) -> bool:
+        return self._images is not None and self._targets is not None
+
+    @property
+    def _has_custom_load(self) -> bool:
+        return not self.__iter__.__qualname__.startswith("ImageDataset")
diff --git a/common/research/common/datasets/roco/__init__.py b/common/research/common/datasets/roco/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/common/research/common/datasets/roco/directory_roco_dataset.py b/common/research/common/datasets/roco/directory_roco_dataset.py
new file mode 100644
index 0000000..b11b707
--- /dev/null
+++ b/common/research/common/datasets/roco/directory_roco_dataset.py
@@ -0,0 +1,34 @@
+from pathlib import Path
+from typing import Iterable, Tuple
+
+from polystar.common.models.image import Image
+from research.common.datasets.roco.roco_annotation import ROCOAnnotation
+from research.common.datasets.roco.roco_dataset import ROCODataset
+
+
+class DirectoryROCODataset(ROCODataset):
+    def __init__(self, dataset_path: Path, dataset_name: str):
+        self.dataset_name = dataset_name
+        self.dataset_path = dataset_path
+
+    @property
+    def images_dir_path(self) -> Path:
+        return self.dataset_path / "image"
+
+    @property
+    def annotation_paths(self) -> Iterable[Path]:
+        return sorted(self.annotations_dir_path.glob("*.xml"))
+
+    @property
+    def annotations_dir_path(self) -> Path:
+        return self.dataset_path / "image_annotation"
+
+    def __iter__(self) -> Iterable[Tuple[Image, ROCOAnnotation]]:
+        for annotation_file in self.annotation_paths:
+            yield self._load_from_annotation_file(annotation_file)
+
+    def _load_from_annotation_file(self, annotation_file: Path) -> Tuple[Image, ROCOAnnotation]:
+        return (
+            Image.from_path(self.images_dir_path / f"{annotation_file.stem}.jpg"),
+            ROCOAnnotation.from_xml_file(annotation_file),
+        )
diff --git a/common/research/common/datasets/roco/roco_annotation.py b/common/research/common/datasets/roco/roco_annotation.py
new file mode 100644
index 0000000..60e069b
--- /dev/null
+++ b/common/research/common/datasets/roco/roco_annotation.py
@@ -0,0 +1,31 @@
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, List
+
+import xmltodict
+from polystar.common.models.object import Object, ObjectFactory
+
+
+@dataclass
+class ROCOAnnotation:
+    objects: List[Object]
+
+    has_rune: bool
+
+    @staticmethod
+    def from_xml_file(xml_file: Path) -> "ROCOAnnotation":
+        try:
+            return ROCOAnnotation.from_xml_dict(xmltodict.parse(xml_file.read_text())["annotation"])
+        except Exception as e:
+            logging.exception(f"Error parsing annotation file {xml_file}")
+            raise e
+
+    @staticmethod
+    def from_xml_dict(xml_dict: Dict) -> "ROCOAnnotation":
+        json_objects = xml_dict.get("object", []) or []
+        json_objects = json_objects if isinstance(json_objects, list) else [json_objects]
+        roco_json_objects = [obj_json for obj_json in json_objects if not obj_json["name"].startswith("rune")]
+        objects = [ObjectFactory.from_json(obj_json) for obj_json in roco_json_objects]
+
+        return ROCOAnnotation(objects=objects, has_rune=len(roco_json_objects) != len(json_objects))
diff --git a/common/research/common/datasets/roco/roco_dataset.py b/common/research/common/datasets/roco/roco_dataset.py
new file mode 100644
index 0000000..57abf82
--- /dev/null
+++ b/common/research/common/datasets/roco/roco_dataset.py
@@ -0,0 +1,4 @@
+from research.common.datasets.image_dataset import ImageDataset
+from research.common.datasets.roco.roco_annotation import ROCOAnnotation
+
+ROCODataset = ImageDataset[ROCOAnnotation]
diff --git a/common/research/common/datasets/roco/roco_datasets.py b/common/research/common/datasets/roco/roco_datasets.py
new file mode 100644
index 0000000..c716d29
--- /dev/null
+++ b/common/research/common/datasets/roco/roco_datasets.py
@@ -0,0 +1,15 @@
+from typing import Any, Tuple
+
+from research.common.dataset.directory_roco_dataset import DirectoryROCODataset
+
+
+class ROCODatasets:
+    def _make_dataset(dataset_name: str, *args: Any) -> DirectoryROCODataset:
+        pass
+
+    def __init_subclass__(cls, **kwargs):
+        for dataset_name, args in cls.__dict__.items():
+            if not callable(args) and not dataset_name.startswith("_"):
+                if not isinstance(args, Tuple):
+                    args = (args,)
+                setattr(cls, dataset_name, cls._make_dataset(dataset_name, *args))
diff --git a/common/research/common/datasets/roco/zoo/__init__.py b/common/research/common/datasets/roco/zoo/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/common/research/common/datasets/roco/zoo/dji.py b/common/research/common/datasets/roco/zoo/dji.py
new file mode 100644
index 0000000..4a630e7
--- /dev/null
+++ b/common/research/common/datasets/roco/zoo/dji.py
@@ -0,0 +1,15 @@
+from research.common.constants import DJI_ROCO_DSET_DIR
+from research.common.datasets.roco.directory_roco_dataset import \
+    DirectoryROCODataset
+from research.common.datasets.roco.roco_datasets import ROCODatasets
+
+
+class DJIROCODatasets(ROCODatasets):
+    CentralChina = "robomaster_Central China Regional Competition"
+    NorthChina = "robomaster_North China Regional Competition"
+    SouthChina = "robomaster_South China Regional Competition"
+    Final = "robomaster_Final Tournament"
+
+    @staticmethod
+    def _make_dataset(dataset_name: str, competition_name: str) -> DirectoryROCODataset:
+        return DirectoryROCODataset(DJI_ROCO_DSET_DIR / competition_name, dataset_name)
diff --git a/common/research/common/datasets/roco/zoo/dji_zoomed.py b/common/research/common/datasets/roco/zoo/dji_zoomed.py
new file mode 100644
index 0000000..009ffac
--- /dev/null
+++ b/common/research/common/datasets/roco/zoo/dji_zoomed.py
@@ -0,0 +1,16 @@
+from polystar.common.utils.str_utils import camel2snake
+from research.common.constants import DJI_ROCO_ZOOMED_DSET_DIR
+from research.common.datasets.roco.directory_roco_dataset import \
+    DirectoryROCODataset
+from research.common.datasets.roco.roco_datasets import ROCODatasets
+
+
+class DJIROCOZoomedDatasets(ROCODatasets):
+    CentralChina = ()
+    NorthChina = ()
+    SouthChina = ()
+    Final = ()
+
+    @staticmethod
+    def _make_dataset(dataset_name: str) -> DirectoryROCODataset:
+        return DirectoryROCODataset(DJI_ROCO_ZOOMED_DSET_DIR / camel2snake(dataset_name), f"{dataset_name}ZoomedV2")
diff --git a/common/research/common/datasets/roco/zoo/roco_datasets_zoo.py b/common/research/common/datasets/roco/zoo/roco_datasets_zoo.py
new file mode 100644
index 0000000..a11d78b
--- /dev/null
+++ b/common/research/common/datasets/roco/zoo/roco_datasets_zoo.py
@@ -0,0 +1,9 @@
+from research.common.datasets.roco.zoo.dji import DJIROCODatasets
+from research.common.datasets.roco.zoo.dji_zoomed import DJIROCOZoomedDatasets
+from research.common.datasets.roco.zoo.twitch import TwitchROCODatasets
+
+
+class ROCODatasetsZoo:
+    DJI_ZOOMED = DJIROCOZoomedDatasets()
+    DJI = DJIROCODatasets()
+    TWITCH = TwitchROCODatasets()
diff --git a/common/research/common/datasets/roco/zoo/twitch.py b/common/research/common/datasets/roco/zoo/twitch.py
new file mode 100644
index 0000000..dfb4963
--- /dev/null
+++ b/common/research/common/datasets/roco/zoo/twitch.py
@@ -0,0 +1,20 @@
+from research.common.constants import TWITCH_DSET_DIR
+from research.common.datasets.roco.directory_roco_dataset import \
+    DirectoryROCODataset
+from research.common.datasets.roco.roco_datasets import ROCODatasets
+
+
+class TwitchROCODatasets(ROCODatasets):
+    TWITCH_470149568 = ()
+    TWITCH_470150052 = ()
+    TWITCH_470151286 = ()
+    TWITCH_470152289 = ()
+    TWITCH_470152730 = ()
+    TWITCH_470152838 = ()
+    TWITCH_470153081 = ()
+    TWITCH_470158483 = ()
+
+    @staticmethod
+    def _make_dataset(dataset_name: str) -> DirectoryROCODataset:
+        twitch_id = dataset_name[len("TWITCH_") :]
+        return DirectoryROCODataset(TWITCH_DSET_DIR / "v1" / twitch_id, f"T{twitch_id}")
-- 
GitLab