diff --git a/common/polystar/common/models/image.py b/common/polystar/common/models/image.py index fcc4faafdfded5bf1558b237dab685132bff813f..2cc6c3404c0703f37e5d30e5a16fc619d0a46414 100644 --- a/common/polystar/common/models/image.py +++ b/common/polystar/common/models/image.py @@ -2,9 +2,9 @@ from pathlib import Path from typing import Iterable import cv2 -from nptyping import Array +import numpy as np -Image = Array[int, ..., ..., 3] +Image = np.ndarray def load_image(image_path: Path, conversion: int = cv2.COLOR_BGR2RGB) -> Image: diff --git a/common/polystar/common/models/object.py b/common/polystar/common/models/object.py index e14c05569c89a5d422598ba19f492fbc52b72157..96b3f455904a41e330b490a40bfb1a4c5863fde6 100644 --- a/common/polystar/common/models/object.py +++ b/common/polystar/common/models/object.py @@ -16,6 +16,9 @@ class ArmorColor(NoCaseEnum): Red = auto() Unknown = auto() + def __str__(self): + return self.name.lower() + ORDERED_ARMOR_COLORS = [ArmorColor.Blue, ArmorColor.Grey, ArmorColor.Red] @@ -42,6 +45,9 @@ class Armor(Object): number: ArmorNumber color: ArmorColor + def __repr__(self): + return f"<{self} {self.color} {self.number}>" + class ObjectFactory: @staticmethod diff --git a/common/polystar/common/utils/iterable_utils.py b/common/polystar/common/utils/iterable_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a0046880057618448b223fab0f64ab231619546a --- /dev/null +++ b/common/polystar/common/utils/iterable_utils.py @@ -0,0 +1,10 @@ +from typing import Iterable + +from more_itertools import ilen + + +def smart_len(it: Iterable) -> int: + try: + return len(it) + except AttributeError: + return ilen(it) diff --git a/common/polystar/common/utils/tqdm.py b/common/polystar/common/utils/tqdm.py new file mode 100644 index 0000000000000000000000000000000000000000..9bf329d3ff9254b87f19882727c4b3ec74dee3c4 --- /dev/null +++ b/common/polystar/common/utils/tqdm.py @@ -0,0 +1,11 @@ +from typing import Iterable, TypeVar + +from tqdm import tqdm + +T = TypeVar("T") + + +def smart_tqdm( + it: Iterable[T], *args, desc: str = None, total: int = None, unit: str = "it", leave: bool = True, **kwargs +) -> Iterable[T]: + return tqdm(it, *args, desc=desc, total=total, unit=unit, leave=leave, **kwargs) diff --git a/common/research/common/dataset/improvement/zoom.py b/common/research/common/dataset/improvement/zoom.py index 785ea347b07b7f8c4ce55db07c5c462b7664c0c1..9404b43d1e115abf71d1644ff189c33e6284ce1f 100644 --- a/common/research/common/dataset/improvement/zoom.py +++ b/common/research/common/dataset/improvement/zoom.py @@ -4,8 +4,7 @@ from typing import Iterable, List, Tuple from polystar.common.models.box import Box from polystar.common.models.image import Image -from polystar.common.target_pipeline.objects_validators.in_box_validator import \ - InBoxValidator +from polystar.common.target_pipeline.objects_validators.in_box_validator import InBoxValidator from polystar.common.view.plt_results_viewer import PltResultViewer from research.common.datasets.roco.roco_annotation import ROCOAnnotation from research.common.datasets.roco.zoo.roco_datasets_zoo import ROCODatasetsZoo @@ -13,7 +12,7 @@ from research.common.datasets.roco.zoo.roco_datasets_zoo import ROCODatasetsZoo def crop_image_annotation( image: Image, annotation: ROCOAnnotation, box: Box, min_coverage: float, name: str -) -> Tuple[Image, ROCOAnnotation]: +) -> Tuple[Image, ROCOAnnotation, str]: objects = InBoxValidator(box, min_coverage).filter(annotation.objects, image) objects = [copy(o) for o in objects] for obj in objects: @@ -25,7 +24,8 @@ def crop_image_annotation( ) return ( image[box.y1 : box.y2, box.x1 : box.x2], - ROCOAnnotation(w=box.w, h=box.h, objects=objects, has_rune=False, name=name), + ROCOAnnotation(w=box.w, h=box.h, objects=objects, has_rune=False), + name, ) @@ -36,12 +36,12 @@ class Zoomer: max_overlap: float min_coverage: float - def zoom(self, image: Image, annotation: ROCOAnnotation) -> Iterable[Tuple[Image, ROCOAnnotation]]: + def zoom(self, image: Image, annotation: ROCOAnnotation, name: str) -> Iterable[Tuple[Image, ROCOAnnotation, str]]: boxes = [obj.box for obj in annotation.objects] boxes = self._create_views_covering(boxes, annotation) boxes = self._remove_overlapping_boxes(boxes) return ( - crop_image_annotation(image, annotation, box, self.min_coverage, name=f"{annotation.name}_zoom_{i}") + crop_image_annotation(image, annotation, box, self.min_coverage, name=f"{name}_zoom_{i}") for (i, box) in enumerate(boxes, 1) ) diff --git a/common/research/common/dataset/tensorflow_record.py b/common/research/common/dataset/tensorflow_record.py index 7c1e40cbe42badacee69be040f598965710f339d..9fb7067ab579777118c3adfd7b06a427ef3a834b 100644 --- a/common/research/common/dataset/tensorflow_record.py +++ b/common/research/common/dataset/tensorflow_record.py @@ -16,15 +16,20 @@ from tqdm import tqdm class TensorflowRecordFactory: @staticmethod def from_datasets(datasets: List[DirectoryROCODataset], prefix: str = ""): - name = prefix + "_".join(d.name for d in datasets) - writer = python_io.TFRecordWriter(str(TENSORFLOW_RECORDS_DIR / f"{name}.record")) + record_name = prefix + "_".join(d.name for d in datasets) + writer = python_io.TFRecordWriter(str(TENSORFLOW_RECORDS_DIR / f"{record_name}.record")) c = 0 - for dataset in tqdm(datasets, desc=name, total=len(datasets), unit="dataset"): - for image_path, annotation in tqdm(dataset, desc=dataset.name, total=len(dataset), unit="img", leave=False): + for dataset in tqdm(datasets, desc=record_name, total=len(datasets), unit="dataset"): + for image_path, annotation, _ in tqdm( + dataset, desc=dataset.name, total=len(dataset), unit="img", leave=False + ): writer.write(_example_from_image_annotation(image_path, annotation).SerializeToString()) c += 1 writer.close() - move(str(TENSORFLOW_RECORDS_DIR / f"{name}.record"), str(TENSORFLOW_RECORDS_DIR / f"{name}_{c}_imgs.record")) + move( + str(TENSORFLOW_RECORDS_DIR / f"{record_name}.record"), + str(TENSORFLOW_RECORDS_DIR / f"{record_name}_{c}_imgs.record"), + ) @staticmethod def from_dataset(dataset: DirectoryROCODataset, prefix: str = ""): diff --git a/common/research/common/datasets/dataset.py b/common/research/common/datasets/dataset.py index eeb9a6e466774e581a9a74b0b50ca1e0e111e314..74376878642bee2293baf12495e1e91c7e65875c 100644 --- a/common/research/common/datasets/dataset.py +++ b/common/research/common/datasets/dataset.py @@ -3,6 +3,7 @@ from collections import deque from typing import Callable, Generic, Iterable, Iterator, Tuple, TypeVar from more_itertools import ilen +from polystar.common.utils.iterable_utils import smart_len from polystar.common.utils.misc import identity ExampleT = TypeVar("ExampleT") @@ -11,7 +12,7 @@ ExampleU = TypeVar("ExampleU") TargetU = TypeVar("TargetU") -class Dataset(Generic[ExampleT, TargetT], Iterable[Tuple[ExampleT, TargetT]], ABC): +class Dataset(Generic[ExampleT, TargetT], Iterable[Tuple[ExampleT, TargetT, str]], ABC): def __init__(self, name: str): self.name = name @@ -25,8 +26,13 @@ class Dataset(Generic[ExampleT, TargetT], Iterable[Tuple[ExampleT, TargetT]], AB def targets(self) -> Iterable[TargetT]: pass + @property + @abstractmethod + def names(self) -> Iterable[TargetT]: + pass + @abstractmethod - def __iter__(self) -> Iterator[Tuple[ExampleT, TargetT]]: + def __iter__(self) -> Iterator[Tuple[ExampleT, TargetT, str]]: pass @abstractmethod @@ -45,7 +51,10 @@ class Dataset(Generic[ExampleT, TargetT], Iterable[Tuple[ExampleT, TargetT]], AB self, example_transformer: Callable[[ExampleT], ExampleU], target_transformer: Callable[[TargetT], TargetU] ) -> "Dataset[ExampleU, TargetU]": return GeneratorDataset( - self.name, lambda: ((example_transformer(example), target_transformer(target)) for example, target in self) + self.name, + lambda: ( + (example_transformer(example), target_transformer(target), name) for example, target, name in self + ), ) def __str__(self): @@ -54,16 +63,13 @@ class Dataset(Generic[ExampleT, TargetT], Iterable[Tuple[ExampleT, TargetT]], AB __repr__ = __str__ def check_consistency(self): - targets, examples = self.targets, self.examples - if isinstance(targets, list) and isinstance(examples, list): - assert len(targets) == len(examples) - assert ilen(targets) == ilen(examples) + assert smart_len(self.targets) == smart_len(self.examples) == smart_len(self.names) class LazyUnzipper: - def __init__(self, iterator: Iterator[Tuple]): + def __init__(self, iterator: Iterator[Tuple], n: int): self._iterator = iterator - self._memory = [deque(), deque()] + self._memory = [deque() for _ in range(n)] def empty(self, i: int): return self._iterator is None and not self._memory[i] @@ -77,7 +83,9 @@ class LazyUnzipper: else: try: elements = next(self._iterator) - self._memory[1 - i].append(elements[1 - i]) + for k in range(len(elements)): + if k != i: + self._memory[k].append(elements[k]) yield elements[i] except StopIteration: self._iterator = None @@ -87,28 +95,33 @@ class LazyUnzipper: class LazyDataset(Dataset[ExampleT, TargetT], ABC): def __init__(self, name: str): super().__init__(name) - self._unzipper = LazyUnzipper(iter(self)) + self._unzipper = None @property def examples(self) -> Iterable[ExampleT]: - if self._unzipper.empty(0): - self._unzipper = LazyUnzipper(iter(self)) - return self._unzipper.elements(0) + return self._elements(0) @property - def targets(self) -> Iterable[ExampleT]: - if self._unzipper.empty(1): - self._unzipper = LazyUnzipper(iter(self)) - return self._unzipper.elements(1) + def targets(self) -> Iterable[TargetT]: + return self._elements(1) + + @property + def names(self) -> Iterable[str]: + return self._elements(2) def __len__(self): return ilen(self) + def _elements(self, i: int) -> Iterable: + if self._unzipper is None or self._unzipper.empty(i): + self._unzipper = LazyUnzipper(iter(self), 3) + return self._unzipper.elements(i) + class GeneratorDataset(LazyDataset[ExampleT, TargetT]): - def __init__(self, name: str, generator: Callable[[], Iterator[Tuple[ExampleT, TargetT]]]): + def __init__(self, name: str, generator: Callable[[], Iterator[Tuple[ExampleT, TargetT, str]]]): self.generator = generator super().__init__(name) - def __iter__(self) -> Iterator[Tuple[ExampleT, TargetT]]: + def __iter__(self) -> Iterator[Tuple[ExampleT, TargetT, str]]: return self.generator() diff --git a/common/research/common/datasets/image_dataset.py b/common/research/common/datasets/image_dataset.py index 047633689a5bcbccf2b6052a4fff0a3918f4a4f2..de58c6c7280c05bc5e98ebbae0bc9ab7fa64aebc 100644 --- a/common/research/common/datasets/image_dataset.py +++ b/common/research/common/datasets/image_dataset.py @@ -4,16 +4,16 @@ from typing import Iterator, List, Tuple from memoized_property import memoized_property from more_itertools import ilen -from polystar.common.models.image import Image +from polystar.common.models.image import Image, load_image from research.common.datasets.dataset import Dataset, LazyDataset, TargetT ImageDataset = Dataset[Image, TargetT] class ImageFileDataset(LazyDataset[Path, TargetT], ABC): - def __iter__(self) -> Iterator[Tuple[Path, TargetT]]: + def __iter__(self) -> Iterator[Tuple[Path, TargetT, str]]: for image_file in self.image_files: - yield image_file, self.target_from_image_file(image_file) + yield image_file, self.target_from_image_file(image_file), image_file.stem @abstractmethod def target_from_image_file(self, image_file: Path) -> TargetT: diff --git a/common/research/common/datasets/roco/directory_roco_dataset.py b/common/research/common/datasets/roco/directory_roco_dataset.py index 5df40213d341fff85c3eadd42ac5fb0c688360d2..1b84b8f9b26f6f1860b5fbc32be11f3f1324b204 100644 --- a/common/research/common/datasets/roco/directory_roco_dataset.py +++ b/common/research/common/datasets/roco/directory_roco_dataset.py @@ -10,7 +10,6 @@ class DirectoryROCODataset(ImageDirectoryDataset[ROCOAnnotation]): super().__init__(dataset_path / "image", name) self.main_dir = dataset_path self.annotations_dir: Path = self.main_dir / "image_annotation" - self.annotations_dir: Path = self.main_dir / "image_annotation" def target_from_image_file(self, image_file: Path) -> ROCOAnnotation: return ROCOAnnotation.from_xml_file(self.annotations_dir / f"{image_file.stem}.xml") @@ -20,6 +19,6 @@ class DirectoryROCODataset(ImageDirectoryDataset[ROCOAnnotation]): self.images_dir.mkdir() self.annotations_dir.mkdir() - def add(self, image: Image, annotation: ROCOAnnotation): - save_image(image, self.images_dir / f"{annotation.name}.jpg") - (self.annotations_dir / f"{annotation.name}.xml").write_text(annotation.to_xml()) + def add(self, image: Image, annotation: ROCOAnnotation, name: str): + save_image(image, self.images_dir / f"{name}.jpg") + (self.annotations_dir / f"{name}.xml").write_text(annotation.to_xml()) diff --git a/common/research/common/datasets/roco/roco_annotation.py b/common/research/common/datasets/roco/roco_annotation.py index 4378679ac7ecc7998bf9e916865944f14ffb7cd5..0317bddf201eb3b344f02aab58e412f89142287c 100644 --- a/common/research/common/datasets/roco/roco_annotation.py +++ b/common/research/common/datasets/roco/roco_annotation.py @@ -11,8 +11,6 @@ from polystar.common.models.object import Object, ObjectFactory @dataclass class ROCOAnnotation: - name: str - objects: List[Object] has_rune: bool @@ -23,13 +21,13 @@ class ROCOAnnotation: @staticmethod def from_xml_file(xml_file: Path) -> "ROCOAnnotation": try: - return ROCOAnnotation.from_xml_dict(xmltodict.parse(xml_file.read_text())["annotation"], xml_file.stem) + return ROCOAnnotation.from_xml_dict(xmltodict.parse(xml_file.read_text())["annotation"]) except Exception as e: logging.exception(f"Error parsing annotation file {xml_file}") raise e @staticmethod - def from_xml_dict(xml_dict: Dict, name: str) -> "ROCOAnnotation": + def from_xml_dict(xml_dict: Dict) -> "ROCOAnnotation": json_objects = xml_dict.get("object", []) or [] json_objects = json_objects if isinstance(json_objects, list) else [json_objects] roco_json_objects = [obj_json for obj_json in json_objects if not obj_json["name"].startswith("rune")] @@ -40,7 +38,6 @@ class ROCOAnnotation: has_rune=len(roco_json_objects) != len(json_objects), w=int(xml_dict["size"]["width"]), h=int(xml_dict["size"]["height"]), - name=name, ) def to_xml(self) -> str: diff --git a/common/research/common/datasets/roco/roco_dataset.py b/common/research/common/datasets/roco/roco_dataset.py index f2fb206c43460631eac2761e300a51f259b86ba5..0d58f5d9e232790e4e74aabe49899d0154418042 100644 --- a/common/research/common/datasets/roco/roco_dataset.py +++ b/common/research/common/datasets/roco/roco_dataset.py @@ -1,6 +1,7 @@ -from research.common.datasets.image_dataset import (ImageDataset, - ImageFileDataset) +from polystar.common.models.image import Image +from research.common.datasets.dataset import Dataset +from research.common.datasets.image_dataset import ImageFileDataset from research.common.datasets.roco.roco_annotation import ROCOAnnotation -ROCODataset = ImageDataset[ROCOAnnotation] +ROCODataset = Dataset[Image, ROCOAnnotation] ROCOFileDataset = ImageFileDataset[ROCOAnnotation] diff --git a/common/research/common/dataset/roco_dataset_descriptor.py b/common/research/common/datasets/roco/roco_dataset_descriptor.py similarity index 100% rename from common/research/common/dataset/roco_dataset_descriptor.py rename to common/research/common/datasets/roco/roco_dataset_descriptor.py diff --git a/common/research/common/datasets/roco/zoo/twitch.py b/common/research/common/datasets/roco/zoo/twitch.py index 68ca46ac3e8254fe5a3b3e86a332e019f3a7dfa8..12eac545f80b9a82b596c73ccac2a172e1b810a3 100644 --- a/common/research/common/datasets/roco/zoo/twitch.py +++ b/common/research/common/datasets/roco/zoo/twitch.py @@ -9,16 +9,16 @@ from research.common.datasets.roco.roco_datasets import ROCODatasets class TwitchROCODatasets(ROCODatasets): directory = TWITCH_DSET_DIR / "v1" - TWITCH_470149568 = () - TWITCH_470150052 = () - TWITCH_470151286 = () - TWITCH_470152289 = () - TWITCH_470152730 = () - TWITCH_470152838 = () - TWITCH_470153081 = () - TWITCH_470158483 = () + T470149568 = () + T470150052 = () + T470151286 = () + T470152289 = () + T470152730 = () + T470152838 = () + T470153081 = () + T470158483 = () @classmethod def make_dataset(cls, dataset_name: str, *args: Any) -> DirectoryROCODataset: - twitch_id = dataset_name[len("TWITCH_") :] - return DirectoryROCODataset(cls.directory / twitch_id, f"T{twitch_id}") + twitch_id = dataset_name[len("T") :] + return DirectoryROCODataset(cls.directory / twitch_id, dataset_name) diff --git a/common/research/common/datasets/simple_dataset.py b/common/research/common/datasets/simple_dataset.py index 1209ae9311ca7da8c8df91c6ff745fc889b969f1..8e6ee1eec2358dfa3e75f2407cb8c2b0b7afbda3 100644 --- a/common/research/common/datasets/simple_dataset.py +++ b/common/research/common/datasets/simple_dataset.py @@ -4,10 +4,11 @@ from research.common.datasets.dataset import Dataset, ExampleT, TargetT class SimpleDataset(Dataset[ExampleT, TargetT]): - def __init__(self, examples: Iterable[ExampleT], targets: Iterable[TargetT], name: str): + def __init__(self, examples: Iterable[ExampleT], targets: Iterable[TargetT], names: Iterable[str], name: str): super().__init__(name) self._examples = list(examples) self._targets = list(targets) + self._names = list(names) self.check_consistency() @property @@ -18,8 +19,12 @@ class SimpleDataset(Dataset[ExampleT, TargetT]): def targets(self) -> List[TargetT]: return self._targets - def __iter__(self) -> Iterator[Tuple[ExampleT, TargetT]]: - return zip(self.examples, self.targets) + @property + def names(self) -> List[TargetT]: + return self._names + + def __iter__(self) -> Iterator[Tuple[ExampleT, TargetT, str]]: + return zip(self.examples, self.targets, self.names) def __len__(self): return len(self.examples) diff --git a/common/research/common/datasets/union_dataset.py b/common/research/common/datasets/union_dataset.py index 24f314d2ef35dab56bb420bf1ddac671f572f59b..4967631ee60b3b173c41741ebef188c13b744f06 100644 --- a/common/research/common/datasets/union_dataset.py +++ b/common/research/common/datasets/union_dataset.py @@ -1,16 +1,15 @@ from typing import Iterable, Iterator, Tuple -from polystar.common.models.image import Image -from research.common.datasets.dataset import ExampleT, LazyDataset, TargetT -from research.common.datasets.image_dataset import ImageDataset +from research.common.datasets.dataset import (Dataset, ExampleT, LazyDataset, + TargetT) class UnionDataset(LazyDataset[ExampleT, TargetT]): - def __init__(self, datasets: Iterable[ImageDataset[TargetT]], name: str): - super().__init__(name) + def __init__(self, datasets: Iterable[Dataset[ExampleT, TargetT]], name: str = None): self.datasets = list(datasets) + super().__init__(name or "_".join(d.name for d in self.datasets)) - def __iter__(self) -> Iterator[Tuple[Image, TargetT]]: + def __iter__(self) -> Iterator[Tuple[ExampleT, TargetT, str]]: for dataset in self.datasets: yield from dataset diff --git a/common/research/common/image_pipeline_evaluation/image_pipeline_evaluation_reporter.py b/common/research/common/image_pipeline_evaluation/image_pipeline_evaluation_reporter.py index 47cacc236e7d46677e2debd7e63151ce4907508d..4e672b2290c644323c25b9b9951b23575f773cfb 100644 --- a/common/research/common/image_pipeline_evaluation/image_pipeline_evaluation_reporter.py +++ b/common/research/common/image_pipeline_evaluation/image_pipeline_evaluation_reporter.py @@ -12,10 +12,12 @@ from polystar.common.utils.dataframe import format_df_column, format_df_row, for from polystar.common.utils.markdown import MarkdownFile from polystar.common.utils.time import create_time_id from research.common.constants import DSET_DIR, EVALUATION_DIR -from research.common.dataset.roco_dataset import ROCODataset -from research.common.image_pipeline_evaluation.image_pipeline_evaluator import (ClassificationResults, - ImagePipelineEvaluator, - SetClassificationResults) +from research.common.datasets.roco.roco_dataset import ROCOFileDataset +from research.common.image_pipeline_evaluation.image_pipeline_evaluator import ( + ClassificationResults, + ImagePipelineEvaluator, + SetClassificationResults, +) @dataclass @@ -54,17 +56,17 @@ class ImagePipelineEvaluationReporter: @staticmethod def _report_dataset( - mf: MarkdownFile, roco_datasets: List[ROCODataset], dataset_sizes: List[int], labels: List[Any] + mf: MarkdownFile, roco_datasets: List[ROCOFileDataset], dataset_sizes: List[int], labels: List[Any] ): total = len(labels) mf.paragraph(f"{total} images") df = DataFrame( { - dataset.dataset_name: Counter(labels[start:end]) + dataset.name: Counter(labels[start:end]) for dataset, start, end in zip(roco_datasets, np.cumsum([0] + dataset_sizes), np.cumsum(dataset_sizes)) } ).fillna(0) - df["Total"] = sum([df[d.dataset_name] for d in roco_datasets]) + df["Total"] = sum([df[d.name] for d in roco_datasets]) df["Repartition"] = (df["Total"] / total).map("{:.1%}".format) mf.table(df) diff --git a/common/research/common/image_pipeline_evaluation/image_pipeline_evaluator.py b/common/research/common/image_pipeline_evaluation/image_pipeline_evaluator.py index e47782ef0c2d180d154ffd8e3ab90b1b359d7d86..8c530b8597b92516d5e46bd4bcb90c3a4c1a93b6 100644 --- a/common/research/common/image_pipeline_evaluation/image_pipeline_evaluator.py +++ b/common/research/common/image_pipeline_evaluation/image_pipeline_evaluator.py @@ -1,16 +1,18 @@ import logging from dataclasses import dataclass +from pathlib import Path from time import time -from typing import Any, Dict, Iterable, List, Sequence +from typing import Any, Dict, Iterable, List, Sequence, Tuple import numpy as np -from sklearn.metrics import classification_report, confusion_matrix - from memoized_property import memoized_property from polystar.common.image_pipeline.image_pipeline import ImagePipeline -from polystar.common.models.image import Image -from research.common.dataset.directory_roco_dataset import DirectoryROCODataset -from research.common.image_pipeline_evaluation.image_dataset_generator import ImageDatasetGenerator +from polystar.common.models.image import Image, load_image +from research.common.datasets.roco.directory_roco_dataset import \ + DirectoryROCODataset +from research.robots_at_robots.dataset.armor_value_dataset import \ + ArmorValueDatasetGenerator +from sklearn.metrics import classification_report, confusion_matrix @dataclass @@ -48,23 +50,17 @@ class ImagePipelineEvaluator: self, train_roco_datasets: List[DirectoryROCODataset], test_roco_datasets: List[DirectoryROCODataset], - image_dataset_generator: ImageDatasetGenerator, + image_dataset_generator: ArmorValueDatasetGenerator, ): logging.info("Loading data") self.train_roco_datasets = train_roco_datasets self.test_roco_datasets = test_roco_datasets - ( - self.train_images_paths, - self.train_images, - self.train_labels, - self.train_dataset_sizes, - ) = image_dataset_generator.from_roco_datasets(train_roco_datasets) - ( - self.test_images_paths, - self.test_images, - self.test_labels, - self.test_dataset_sizes, - ) = image_dataset_generator.from_roco_datasets(test_roco_datasets) + (self.train_images_paths, self.train_images, self.train_labels, self.train_dataset_sizes) = load_datasets( + train_roco_datasets, image_dataset_generator + ) + (self.test_images_paths, self.test_images, self.test_labels, self.test_dataset_sizes) = load_datasets( + test_roco_datasets, image_dataset_generator + ) def evaluate_pipelines(self, pipelines: Iterable[ImagePipeline]) -> Dict[str, ClassificationResults]: return {str(pipeline): self.evaluate(pipeline) for pipeline in pipelines} @@ -87,3 +83,13 @@ class ImagePipelineEvaluator: preds = pipeline.predict(images) mean_time = (time() - t) / len(images) return SetClassificationResults(np.asarray(labels), np.asarray(preds), mean_time) + + +def load_datasets( + datasets: List[DirectoryROCODataset], image_dataset_generator: ArmorValueDatasetGenerator +) -> Tuple[List[Path], List[Image], List[Any], List[int]]: + dataset_sizes = [len(d) for d in datasets] + dataset = image_dataset_generator.from_roco_datasets(datasets) + paths, targets = list(dataset.examples), list(dataset.targets) + images = list(map(load_image, paths)) + return paths, images, targets, dataset_sizes diff --git a/common/research/common/scripts/construct_twith_datasets_from_manual_annotation.py b/common/research/common/scripts/construct_twith_datasets_from_manual_annotation.py index 767b0ca174e05ce07f4f1925f8a931f65a259179..459ae3d64366442ed1c8457bf0fcabd76d42d30d 100644 --- a/common/research/common/scripts/construct_twith_datasets_from_manual_annotation.py +++ b/common/research/common/scripts/construct_twith_datasets_from_manual_annotation.py @@ -4,10 +4,10 @@ from shutil import copy, make_archive, move, rmtree from research.common.constants import (TWITCH_DSET_DIR, TWITCH_DSET_ROBOTS_VIEWS_DIR, TWITCH_ROBOTS_VIEWS_DIR) -from research.common.dataset.roco_dataset_descriptor import \ - make_markdown_dataset_report from research.common.datasets.roco.directory_roco_dataset import \ DirectoryROCODataset +from research.common.datasets.roco.roco_dataset_descriptor import \ + make_markdown_dataset_report from research.common.scripts.construct_dataset_from_manual_annotation import \ construct_dataset_from_manual_annotations from research.common.scripts.correct_annotations import AnnotationFileCorrector @@ -26,22 +26,23 @@ def _correct_manual_annotations(): def _extract_runes_images(): all_twitch_dataset = _get_mixed_dataset() - for annotation in all_twitch_dataset.image_annotations: + for image_file, annotation, _ in all_twitch_dataset: if annotation.has_rune: - copy(str(annotation.image_path), str(TWITCH_DSET_DIR / "runes" / annotation.image_path.name)) + copy(str(image_file), str(TWITCH_DSET_DIR / "runes" / image_file.name)) def _separate_twitch_videos(): all_twitch_dataset = _get_mixed_dataset() - for annotation in all_twitch_dataset.image_annotations: - video_name = annotation.image_path.name.split("-")[0] + for image_file, annotation, _ in all_twitch_dataset: + video_name = image_file.name.split("-")[0] dset_path = TWITCH_DSET_ROBOTS_VIEWS_DIR / video_name images_path = dset_path / "image" annotations_path = dset_path / "image_annotation" images_path.mkdir(exist_ok=True, parents=True) annotations_path.mkdir(exist_ok=True, parents=True) - move(str(annotation.image_path), str(images_path / annotation.image_path.name)) - move(str(annotation.xml_path), str(annotations_path / annotation.xml_path.name)) + move(str(image_file), str(images_path / image_file.name)) + xml_name = f"{image_file.stem}.xml" + move(str(all_twitch_dataset.annotations_dir / xml_name), str(annotations_path / xml_name)) if list((TWITCH_DSET_ROBOTS_VIEWS_DIR / "image").glob("*")): raise Exception(f"Some images remains unmoved") for remaining_file in (TWITCH_DSET_ROBOTS_VIEWS_DIR / "image_annotation").glob("*"): @@ -53,7 +54,7 @@ def _separate_twitch_videos(): def _make_global_report(): all_twitch_dataset = _get_mixed_dataset() - make_markdown_dataset_report(all_twitch_dataset, all_twitch_dataset.dataset_path) + make_markdown_dataset_report(all_twitch_dataset, all_twitch_dataset.main_dir) def _get_mixed_dataset() -> DirectoryROCODataset: @@ -64,7 +65,7 @@ def _make_separate_reports(): for video_dset_path in TWITCH_DSET_ROBOTS_VIEWS_DIR.glob("*"): if video_dset_path.is_dir(): twitch_dset = DirectoryROCODataset(video_dset_path, f"TWITCH_{video_dset_path.name}") - make_markdown_dataset_report(twitch_dset, twitch_dset.dataset_path) + make_markdown_dataset_report(twitch_dset, twitch_dset.main_dir) if __name__ == "__main__": diff --git a/common/research/common/scripts/create_tensorflow_records.py b/common/research/common/scripts/create_tensorflow_records.py index 6983ff3302e9e18c6c571e0372b05b07f35054a8..7563a0a0a0d7260a22592dcbe02a151cc4125d30 100644 --- a/common/research/common/scripts/create_tensorflow_records.py +++ b/common/research/common/scripts/create_tensorflow_records.py @@ -15,17 +15,16 @@ def create_one_record_per_roco_dset(): def create_twitch_records(): TensorflowRecordFactory.from_datasets( [ - TwitchROCODatasets.TWITCH_470149568, - TwitchROCODatasets.TWITCH_470150052, - TwitchROCODatasets.TWITCH_470151286, - TwitchROCODatasets.TWITCH_470152289, - TwitchROCODatasets.TWITCH_470152730, + TwitchROCODatasets.T470149568, + TwitchROCODatasets.T470150052, + TwitchROCODatasets.T470151286, + TwitchROCODatasets.T470152289, + TwitchROCODatasets.T470152730, ], "Twitch_Train_", ) TensorflowRecordFactory.from_datasets( - [TwitchROCODatasets.TWITCH_470152838, TwitchROCODatasets.TWITCH_470153081, TwitchROCODatasets.TWITCH_470158483], - "Twitch_Test_", + [TwitchROCODatasets.T470152838, TwitchROCODatasets.T470153081, TwitchROCODatasets.T470158483], "Twitch_Test_", ) diff --git a/common/research/common/scripts/improve_roco_by_zooming.py b/common/research/common/scripts/improve_roco_by_zooming.py index d38ddf1b411107cc98405b478730f653b944027c..ed7d40ae0f31e940084970fd773044ba9b18f310 100644 --- a/common/research/common/scripts/improve_roco_by_zooming.py +++ b/common/research/common/scripts/improve_roco_by_zooming.py @@ -1,3 +1,4 @@ +from polystar.common.utils.tqdm import smart_tqdm from research.common.dataset.improvement.zoom import Zoomer from research.common.dataset.perturbations.image_modifiers.brightness import \ BrightnessModifier @@ -9,7 +10,6 @@ from research.common.dataset.perturbations.perturbator import ImagePerturbator from research.common.datasets.roco.roco_dataset import ROCOFileDataset from research.common.datasets.roco.zoo.dji_zoomed import DJIROCOZoomedDatasets from research.common.datasets.roco.zoo.roco_datasets_zoo import ROCODatasetsZoo -from tqdm import tqdm def improve_dji_roco_dataset_by_zooming_and_perturbating( @@ -18,10 +18,10 @@ def improve_dji_roco_dataset_by_zooming_and_perturbating( zoomed_dset = DJIROCOZoomedDatasets.make_dataset(dset.name) zoomed_dset.create() - for img, annotation in tqdm(dset.open(), desc=f"Processing {dset}", unit="image", total=len(dset)): - for zoomed_image, zoomed_annotation in zoomer.zoom(img, annotation): + for img, annotation, name in smart_tqdm(dset.open(), desc=f"Processing {dset}", unit="image", total=len(dset)): + for zoomed_image, zoomed_annotation, zoomed_name in zoomer.zoom(img, annotation, name): zoomed_image = perturbator.perturbate(zoomed_image) - zoomed_dset.add(zoomed_image, zoomed_annotation) + zoomed_dset.add(zoomed_image, zoomed_annotation, zoomed_name) def improve_all_dji_datasets_by_zooming_and_perturbating(zoomer: Zoomer, perturbator: ImagePerturbator): diff --git a/common/research/common/scripts/visualize_dataset.py b/common/research/common/scripts/visualize_dataset.py index 1bdb43a71d8b39107999552d76f692257f752304..eda5a19371d1f641216a69dcb91ddff4ca86fc51 100644 --- a/common/research/common/scripts/visualize_dataset.py +++ b/common/research/common/scripts/visualize_dataset.py @@ -6,7 +6,7 @@ from research.common.datasets.roco.zoo.roco_datasets_zoo import ROCODatasetsZoo def visualize_dataset(dataset: ROCODataset, n_images: int): viewer = PltResultViewer(dataset.name) - for i, (image, annotation) in enumerate(dataset, 1): + for i, (image, annotation, name) in enumerate(dataset, 1): viewer.display_image_with_objects(image, annotation.objects) if i == n_images: diff --git a/common/tests/common/unittests/datasets/test_dataset.py b/common/tests/common/unittests/datasets/test_dataset.py index fff885d3cf565a489fa783dfadf865173811d0b7..cf34bbaa6e5d5e8a238f787c30be254edca4b316 100644 --- a/common/tests/common/unittests/datasets/test_dataset.py +++ b/common/tests/common/unittests/datasets/test_dataset.py @@ -11,23 +11,30 @@ class TestDataset(TestCase): str_str_dataset: Dataset[str, str] = dataset.transform(str, str) - self.assertEqual([("0", "8"), ("1", "9"), ("2", "10"), ("3", "11")], list(str_str_dataset)) + self.assertEqual( + [("0", "8", "data_1"), ("1", "9", "data_2"), ("2", "10", "data_3"), ("3", "11", "data_4")], + list(str_str_dataset), + ) def test_transform_examples(self): dataset = _make_fake_dataset() str_int_dataset: Dataset[str, int] = dataset.transform_examples(str) - self.assertEqual([("0", 8), ("1", 9), ("2", 10), ("3", 11)], list(str_int_dataset)) + self.assertEqual( + [("0", 8, "data_1"), ("1", 9, "data_2"), ("2", 10, "data_3"), ("3", 11, "data_4")], list(str_int_dataset) + ) def test_transform_not_exhaustible(self): dataset = _make_fake_dataset() str_int_dataset: Dataset[str, float] = dataset.transform_examples(str) - self.assertEqual([("0", 8), ("1", 9), ("2", 10), ("3", 11)], list(str_int_dataset)) - self.assertEqual([("0", 8), ("1", 9), ("2", 10), ("3", 11)], list(str_int_dataset)) - self.assertEqual([("0", 8), ("1", 9), ("2", 10), ("3", 11)], list(str_int_dataset)) + items = [("0", 8, "data_1"), ("1", 9, "data_2"), ("2", 10, "data_3"), ("3", 11, "data_4")] + + self.assertEqual(items, list(str_int_dataset)) + self.assertEqual(items, list(str_int_dataset)) + self.assertEqual(items, list(str_int_dataset)) class TestSimpleDataset(TestCase): @@ -36,11 +43,12 @@ class TestSimpleDataset(TestCase): self.assertEqual([0, 1, 2, 3], dataset.examples) self.assertEqual([8, 9, 10, 11], dataset.targets) + self.assertEqual(["data_1", "data_2", "data_3", "data_4"], dataset.names) def test_iter(self): dataset = _make_fake_dataset() - self.assertEqual([(0, 8), (1, 9), (2, 10), (3, 11)], list(dataset)) + self.assertEqual([(0, 8, "data_1"), (1, 9, "data_2"), (2, 10, "data_3"), (3, 11, "data_4")], list(dataset)) def test_len(self): dataset = _make_fake_dataset() @@ -49,14 +57,14 @@ class TestSimpleDataset(TestCase): def test_consistency(self): with self.assertRaises(AssertionError): - SimpleDataset([0, 1], [8, 9, 10, 11], "fake") + SimpleDataset([0, 1], [8, 9, 10, 11], ["a", "b"], "fake") class FakeLazyDataset(LazyDataset): def __init__(self): super().__init__("fake") - __iter__ = MagicMock(side_effect=lambda *args: iter([(1, 1), (2, 4), (3, 9)])) + __iter__ = MagicMock(side_effect=lambda *args: iter([(1, 1, "data_1"), (2, 4, "data_2"), (3, 9, "data_3")])) class TestLazyDataset(TestCase): @@ -65,7 +73,10 @@ class TestLazyDataset(TestCase): self.assertEqual([1, 2, 3], list(dataset.examples)) self.assertEqual([1, 4, 9], list(dataset.targets)) - self.assertEqual([(1, 1), (2, 4), (3, 9)], list(zip(dataset.examples, dataset.targets))) + self.assertEqual( + [(1, 1, "data_1"), (2, 4, "data_2"), (3, 9, "data_3")], + list(zip(dataset.examples, dataset.targets, dataset.names)), + ) def test_properties_laziness(self): FakeLazyDataset.__iter__.reset_mock() @@ -80,6 +91,11 @@ class TestLazyDataset(TestCase): list(zip(dataset.examples, dataset.targets)) FakeLazyDataset.__iter__.assert_called_once() + FakeLazyDataset.__iter__.reset_mock() + + list(dataset.names) + FakeLazyDataset.__iter__.assert_not_called() + def _make_fake_dataset() -> Dataset[int, int]: - return SimpleDataset([0, 1, 2, 3], [8, 9, 10, 11], "fake") + return SimpleDataset([0, 1, 2, 3], [8, 9, 10, 11], [f"data_{i}" for i in range(1, 5)], "fake")