From 368827297693d0131cb0de92e424231a83c33616 Mon Sep 17 00:00:00 2001 From: Mathieu Beligon <mathieu@feedly.com> Date: Thu, 24 Sep 2020 18:50:57 +0200 Subject: [PATCH] [common] (datasets) refactor with builder paradigm --- .../research/common/datasets_v3/__init__.py | 0 common/research/common/datasets_v3/dataset.py | 17 ++++++ .../common/datasets_v3/dataset_builder.py | 60 +++++++++++++++++++ .../common/datasets_v3/filter_dataset.py | 25 ++++++++ .../common/datasets_v3/image_dataset.py | 11 ++++ .../datasets_v3/image_file_dataset_builder.py | 34 +++++++++++ .../common/datasets_v3/lazy_dataset.py | 17 ++++++ .../common/datasets_v3/roco/__init__.py | 0 .../common/datasets_v3/roco/roco_dataset.py | 12 ++++ .../datasets_v3/roco/roco_dataset_builder.py | 14 +++++ .../roco/roco_dataset_descriptor.py | 21 +++---- .../common/datasets_v3/roco/roco_datasets.py | 56 +++++++++++++++++ .../common/datasets_v3/roco/zoo/__init__.py | 0 .../common/datasets_v3/roco/zoo/dji.py | 15 +++++ .../common/datasets_v3/roco/zoo/dji_zoomed.py | 20 +++++++ .../datasets_v3/roco/zoo/roco_dataset_zoo.py | 18 ++++++ .../common/datasets_v3/roco/zoo/twitch.py | 31 ++++++++++ .../common/datasets_v3/transform_dataset.py | 24 ++++++++ .../common/datasets_v3/union_dataset.py | 32 ++++++++++ 19 files changed, 397 insertions(+), 10 deletions(-) create mode 100644 common/research/common/datasets_v3/__init__.py create mode 100644 common/research/common/datasets_v3/dataset.py create mode 100644 common/research/common/datasets_v3/dataset_builder.py create mode 100644 common/research/common/datasets_v3/filter_dataset.py create mode 100644 common/research/common/datasets_v3/image_dataset.py create mode 100644 common/research/common/datasets_v3/image_file_dataset_builder.py create mode 100644 common/research/common/datasets_v3/lazy_dataset.py create mode 100644 common/research/common/datasets_v3/roco/__init__.py create mode 100644 common/research/common/datasets_v3/roco/roco_dataset.py create mode 100644 common/research/common/datasets_v3/roco/roco_dataset_builder.py rename common/research/common/{datasets => datasets_v3}/roco/roco_dataset_descriptor.py (75%) create mode 100644 common/research/common/datasets_v3/roco/roco_datasets.py create mode 100644 common/research/common/datasets_v3/roco/zoo/__init__.py create mode 100644 common/research/common/datasets_v3/roco/zoo/dji.py create mode 100644 common/research/common/datasets_v3/roco/zoo/dji_zoomed.py create mode 100644 common/research/common/datasets_v3/roco/zoo/roco_dataset_zoo.py create mode 100644 common/research/common/datasets_v3/roco/zoo/twitch.py create mode 100644 common/research/common/datasets_v3/transform_dataset.py create mode 100644 common/research/common/datasets_v3/union_dataset.py diff --git a/common/research/common/datasets_v3/__init__.py b/common/research/common/datasets_v3/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/common/research/common/datasets_v3/dataset.py b/common/research/common/datasets_v3/dataset.py new file mode 100644 index 0000000..4fd2cb8 --- /dev/null +++ b/common/research/common/datasets_v3/dataset.py @@ -0,0 +1,17 @@ +from typing import Iterator, List, Tuple + +from research.common.datasets_v3.lazy_dataset import ExampleT, LazyDataset, TargetT + + +class Dataset(LazyDataset[ExampleT, TargetT]): + def __init__(self, examples: List[ExampleT], targets: List[TargetT], names: List[str], name: str): + super().__init__(name) + self.names = names + self.targets = targets + self.examples = examples + + def __iter__(self) -> Iterator[Tuple[ExampleT, TargetT, str]]: + return zip(self.examples, self.targets, self.names) + + def __len__(self): + return len(self.examples) diff --git a/common/research/common/datasets_v3/dataset_builder.py b/common/research/common/datasets_v3/dataset_builder.py new file mode 100644 index 0000000..fdc890f --- /dev/null +++ b/common/research/common/datasets_v3/dataset_builder.py @@ -0,0 +1,60 @@ +from typing import Callable, Generic, Iterable + +from polystar.common.filters.filter_abc import FilterABC +from polystar.common.filters.pass_through_filter import PassThroughFilter +from polystar.common.utils.misc import identity +from research.common.datasets_v3.dataset import Dataset +from research.common.datasets_v3.filter_dataset import ExampleU, FilterDataset, TargetU +from research.common.datasets_v3.lazy_dataset import ExampleT, LazyDataset, TargetT +from research.common.datasets_v3.transform_dataset import TransformDataset + + +class DatasetBuilder(Generic[ExampleT, TargetT]): + def __init__(self, dataset: LazyDataset[ExampleT, TargetT]): + self.dataset = dataset + self._built = False + + def build_lazy(self) -> LazyDataset[ExampleT, TargetT]: + assert not self._built + self._built = True + return self.dataset + + def build(self) -> Dataset[ExampleT, TargetT]: + assert not self._built + self._built = True + examples, targets, names = zip(*iter(self.dataset)) + return Dataset(list(examples), list(targets), list(names), self.name) + + def build_examples(self) -> Iterable[ExampleT]: + assert not self._built + self._built = True + for ex, _, _ in self.dataset: + yield ex + + def filter_examples(self, examples_filter: FilterABC[ExampleT]) -> "DatasetBuilder[ExampleT, TargetT]": + self.dataset = FilterDataset(self.dataset, examples_filter, PassThroughFilter()) + return self + + def filter_targets(self, targets_filter: FilterABC[ExampleT]) -> "DatasetBuilder[ExampleT, TargetT]": + self.dataset = FilterDataset(self.dataset, PassThroughFilter(), targets_filter) + return self + + def transform_examples( + self, example_transformer: Callable[[ExampleT], ExampleU] + ) -> "DatasetBuilder[ExampleU, TargetT]": + self.dataset = TransformDataset(self.dataset, example_transformer, identity) + return self + + def transform_targets( + self, target_transformer: Callable[[TargetT], TargetU] + ) -> "DatasetBuilder[ExampleT, TargetU]": + self.dataset = TransformDataset(self.dataset, identity, target_transformer) + return self + + @property + def name(self) -> str: + return self.dataset.name + + @name.setter + def name(self, name: str): + self.dataset.name = name diff --git a/common/research/common/datasets_v3/filter_dataset.py b/common/research/common/datasets_v3/filter_dataset.py new file mode 100644 index 0000000..415b3cb --- /dev/null +++ b/common/research/common/datasets_v3/filter_dataset.py @@ -0,0 +1,25 @@ +from typing import Iterator, Tuple, TypeVar + +from polystar.common.filters.filter_abc import FilterABC +from research.common.datasets_v3.lazy_dataset import ExampleT, LazyDataset, TargetT + +ExampleU = TypeVar("ExampleU") +TargetU = TypeVar("TargetU") + + +class FilterDataset(LazyDataset[ExampleT, TargetT]): + def __init__( + self, + source: LazyDataset[ExampleT, TargetT], + examples_filter: FilterABC[ExampleT], + targets_filter: FilterABC[TargetT], + ): + super().__init__(source.name) + self.targets_filter = targets_filter + self.examples_filter = examples_filter + self.source = source + + def __iter__(self) -> Iterator[Tuple[ExampleU, TargetU, str]]: + for example, target, name in self.source: + if self.examples_filter.validate_single(example) and self.targets_filter.validate_single(target): + yield example, target, name diff --git a/common/research/common/datasets_v3/image_dataset.py b/common/research/common/datasets_v3/image_dataset.py new file mode 100644 index 0000000..b7bf27e --- /dev/null +++ b/common/research/common/datasets_v3/image_dataset.py @@ -0,0 +1,11 @@ +from pathlib import Path + +from polystar.common.models.image import Image +from research.common.datasets_v3.dataset import Dataset +from research.common.datasets_v3.lazy_dataset import TargetT + +LazyFileDataset = Dataset[Path, TargetT] +FileDataset = Dataset[Path, TargetT] + +LazyImageDataset = Dataset[Image, TargetT] +ImageDataset = Dataset[Image, TargetT] diff --git a/common/research/common/datasets_v3/image_file_dataset_builder.py b/common/research/common/datasets_v3/image_file_dataset_builder.py new file mode 100644 index 0000000..fbf86df --- /dev/null +++ b/common/research/common/datasets_v3/image_file_dataset_builder.py @@ -0,0 +1,34 @@ +from pathlib import Path +from typing import Callable, Iterable, Iterator, Tuple + +from polystar.common.models.image import Image, load_image +from research.common.datasets_v3.dataset_builder import DatasetBuilder +from research.common.datasets_v3.lazy_dataset import LazyDataset, TargetT + + +class LazyFileDataset(LazyDataset[Path, TargetT]): + def __init__(self, files: Iterable[Path], target_from_file: Callable[[Path], TargetT], name: str): + super().__init__(name) + self.target_from_file = target_from_file + self.files = sorted(files) + + def __iter__(self) -> Iterator[Tuple[Path, TargetT, str]]: + for file in self.files: + yield file, self.target_from_file(file), file.stem + + def __len__(self): + return len(self.files) + + +class FileDatasetBuilder(DatasetBuilder[Path, TargetT]): + def __init__(self, dataset: LazyFileDataset): + super().__init__(dataset) + + def to_images(self) -> DatasetBuilder[Image, TargetT]: + return self.transform_examples(load_image) + + +class DirectoryDatasetBuilder(FileDatasetBuilder): + def __init__(self, directory: Path, target_from_file: Callable[[Path], TargetT], name: str, extension: str = "jpg"): + super().__init__(LazyFileDataset(directory.glob(f"*.{extension}"), target_from_file, name)) + self.images_dir = directory diff --git a/common/research/common/datasets_v3/lazy_dataset.py b/common/research/common/datasets_v3/lazy_dataset.py new file mode 100644 index 0000000..c240969 --- /dev/null +++ b/common/research/common/datasets_v3/lazy_dataset.py @@ -0,0 +1,17 @@ +from abc import ABC, abstractmethod +from typing import Generic, Iterable, Iterator, Tuple, TypeVar + +ExampleT = TypeVar("ExampleT") +TargetT = TypeVar("TargetT") + + +class LazyDataset(Generic[ExampleT, TargetT], Iterable[Tuple[ExampleT, TargetT, str]], ABC): + def __init__(self, name: str): + self.name = name + + @abstractmethod + def __iter__(self) -> Iterator[Tuple[ExampleT, TargetT, str]]: + pass + + def __len__(self): + raise NotImplemented() diff --git a/common/research/common/datasets_v3/roco/__init__.py b/common/research/common/datasets_v3/roco/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/common/research/common/datasets_v3/roco/roco_dataset.py b/common/research/common/datasets_v3/roco/roco_dataset.py new file mode 100644 index 0000000..e365294 --- /dev/null +++ b/common/research/common/datasets_v3/roco/roco_dataset.py @@ -0,0 +1,12 @@ +from pathlib import Path + +from polystar.common.models.image import Image +from research.common.datasets.roco.roco_annotation import ROCOAnnotation +from research.common.datasets_v3.dataset import Dataset +from research.common.datasets_v3.lazy_dataset import LazyDataset + +LazyROCOFileDataset = LazyDataset[Path, ROCOAnnotation] +ROCOFileDataset = Dataset[Path, ROCOAnnotation] + +LazyROCODataset = LazyDataset[Image, ROCOAnnotation] +ROCODataset = Dataset[Image, ROCOAnnotation] diff --git a/common/research/common/datasets_v3/roco/roco_dataset_builder.py b/common/research/common/datasets_v3/roco/roco_dataset_builder.py new file mode 100644 index 0000000..ce09a0d --- /dev/null +++ b/common/research/common/datasets_v3/roco/roco_dataset_builder.py @@ -0,0 +1,14 @@ +from pathlib import Path + +from research.common.datasets.roco.roco_annotation import ROCOAnnotation +from research.common.datasets_v3.image_file_dataset_builder import DirectoryDatasetBuilder + + +class ROCODatasetBuilder(DirectoryDatasetBuilder): + def __init__(self, directory: Path, name: str, extension: str = "jpg"): + super().__init__(directory / "image", self.roco_annotation_from_image_file, name, extension) + self.annotations_dir = directory / "image_annotation" + self.main_dir = directory + + def roco_annotation_from_image_file(self, image_file: Path) -> ROCOAnnotation: + return ROCOAnnotation.from_xml_file(self.annotations_dir / f"{image_file.stem}.xml") diff --git a/common/research/common/datasets/roco/roco_dataset_descriptor.py b/common/research/common/datasets_v3/roco/roco_dataset_descriptor.py similarity index 75% rename from common/research/common/datasets/roco/roco_dataset_descriptor.py rename to common/research/common/datasets_v3/roco/roco_dataset_descriptor.py index 2c790fd..f72ff11 100644 --- a/common/research/common/datasets/roco/roco_dataset_descriptor.py +++ b/common/research/common/datasets_v3/roco/roco_dataset_descriptor.py @@ -3,12 +3,12 @@ from pathlib import Path from typing import Dict from pandas import DataFrame +from tqdm import tqdm + from polystar.common.models.object import Armor, ObjectType from polystar.common.utils.markdown import MarkdownFile -from research.common.datasets.roco.roco_dataset import ROCODataset -from research.common.datasets.roco.zoo.roco_datasets_zoo import ROCODatasetsZoo -from research.common.datasets.union_dataset import UnionDataset -from tqdm import tqdm +from research.common.datasets_v3.roco.roco_dataset import LazyROCOFileDataset +from research.common.datasets_v3.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo @dataclass @@ -23,13 +23,13 @@ class ROCODatasetStats: armors_color2num2count: Dict[str, Dict[int, int]] = field(default_factory=dict) @staticmethod - def from_dataset(dataset: ROCODataset) -> "ROCODatasetStats": + def from_dataset(dataset: LazyROCOFileDataset) -> "ROCODatasetStats": rv = ROCODatasetStats() colors = ["red", "grey", "blue", "total"] rv.armors_color2num2count = {c: {n: 0 for n in range(10)} for c in colors} for c in colors: rv.armors_color2num2count[c]["total"] = 0 - for annotation in tqdm(dataset.targets, desc=dataset.name, unit="frame", total=len(dataset)): + for (_, annotation, _) in tqdm(dataset, desc=dataset.name, unit="frame"): rv.n_images += 1 rv.n_runes += annotation.has_rune for obj in annotation.objects: @@ -46,7 +46,7 @@ class ROCODatasetStats: return rv -def make_markdown_dataset_report(dataset: ROCODataset, report_dir: Path): +def make_markdown_dataset_report(dataset: LazyROCOFileDataset, report_dir: Path): report_path = report_dir / f"dset_{dataset.name}_report.md" stats = ROCODatasetStats.from_dataset(dataset) @@ -67,7 +67,8 @@ def make_markdown_dataset_report(dataset: ROCODataset, report_dir: Path): if __name__ == "__main__": - for datasets in ROCODatasetsZoo(): - make_markdown_dataset_report(UnionDataset(datasets, datasets.name), datasets.directory) + dset = ROCODatasetsZoo.DJI.FINAL + for datasets in ROCODatasetsZoo: + make_markdown_dataset_report(datasets.union(), datasets.datasets_dir()) for dset in datasets: - make_markdown_dataset_report(dset, dset.main_dir) + make_markdown_dataset_report(dset.lazy_files(), dset.main_dir) diff --git a/common/research/common/datasets_v3/roco/roco_datasets.py b/common/research/common/datasets_v3/roco/roco_datasets.py new file mode 100644 index 0000000..7c7767d --- /dev/null +++ b/common/research/common/datasets_v3/roco/roco_datasets.py @@ -0,0 +1,56 @@ +from abc import abstractmethod +from enum import Enum +from pathlib import Path +from typing import Iterator + +from polystar.common.utils.str_utils import snake2camel +from research.common.datasets.roco.roco_annotation import ROCOAnnotation +from research.common.datasets_v3.roco.roco_dataset import ( + LazyROCODataset, + LazyROCOFileDataset, + ROCODataset, + ROCOFileDataset, +) +from research.common.datasets_v3.roco.roco_dataset_builder import ROCODatasetBuilder +from research.common.datasets_v3.union_dataset import UnionLazyDataset + + +class ROCODatasets(Enum): + def __init__(self, dataset_dir_name: str, dataset_name: str = None): + self.dataset_name = dataset_name or snake2camel(self.name) + self._dataset_dir_name = dataset_dir_name + + def lazy(self) -> LazyROCODataset: + return self._dataset_builder.to_images().build_lazy() + + def lazy_files(self) -> LazyROCOFileDataset: + return self._dataset_builder.build_lazy() + + def dataset(self) -> ROCODataset: + return self._dataset_builder.to_images().build() + + def files_dataset(self) -> ROCOFileDataset: + return self._dataset_builder.build() + + @property + def main_dir(self): + return self.datasets_dir() / self._dataset_dir_name + + @property + def _dataset_builder(self) -> ROCODatasetBuilder: + return ROCODatasetBuilder(self.main_dir, self.dataset_name) + + @classmethod + @abstractmethod + def datasets_dir(cls) -> Path: # Fixme: in python 37, we can define a class var using the _ignore_ attribute + pass + + def __iter__(self) -> Iterator["ROCODatasets"]: # needed for pycharm typing, dont know why + return self.__iter__() + + @classmethod + def union(cls) -> UnionLazyDataset[Path, ROCOAnnotation]: + return UnionLazyDataset((d.lazy_files() for d in cls), cls.datasets_name) + + def __init_subclass__(cls, **kwargs): + cls.datasets_name = cls.__name__.replace("Datasets", "").replace("ROCO", "") diff --git a/common/research/common/datasets_v3/roco/zoo/__init__.py b/common/research/common/datasets_v3/roco/zoo/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/common/research/common/datasets_v3/roco/zoo/dji.py b/common/research/common/datasets_v3/roco/zoo/dji.py new file mode 100644 index 0000000..966eb19 --- /dev/null +++ b/common/research/common/datasets_v3/roco/zoo/dji.py @@ -0,0 +1,15 @@ +from pathlib import Path + +from research.common.constants import DJI_ROCO_DSET_DIR +from research.common.datasets_v3.roco.roco_datasets import ROCODatasets + + +class DJIROCODatasets(ROCODatasets): + CENTRAL_CHINA = "robomaster_Central China Regional Competition" + NORTH_CHINA = "robomaster_North China Regional Competition" + SOUTH_CHINA = "robomaster_South China Regional Competition" + FINAL = "robomaster_Final Tournament" + + @classmethod + def datasets_dir(cls) -> Path: + return DJI_ROCO_DSET_DIR diff --git a/common/research/common/datasets_v3/roco/zoo/dji_zoomed.py b/common/research/common/datasets_v3/roco/zoo/dji_zoomed.py new file mode 100644 index 0000000..2e185d3 --- /dev/null +++ b/common/research/common/datasets_v3/roco/zoo/dji_zoomed.py @@ -0,0 +1,20 @@ +from enum import auto +from pathlib import Path + +from polystar.common.utils.str_utils import snake2camel +from research.common.constants import DJI_ROCO_ZOOMED_DSET_DIR +from research.common.datasets_v3.roco.roco_datasets import ROCODatasets + + +class DJIROCOZoomedDatasets(ROCODatasets): + def __init__(self, _): + super().__init__(self.name.lower(), f"{snake2camel(self.name)}ZoomedV2") + + CENTRAL_CHINA = auto() + NORTH_CHINA = auto() + SOUTH_CHINA = auto() + FINAL = auto() + + @classmethod + def datasets_dir(cls) -> Path: + return DJI_ROCO_ZOOMED_DSET_DIR diff --git a/common/research/common/datasets_v3/roco/zoo/roco_dataset_zoo.py b/common/research/common/datasets_v3/roco/zoo/roco_dataset_zoo.py new file mode 100644 index 0000000..4e1a3ba --- /dev/null +++ b/common/research/common/datasets_v3/roco/zoo/roco_dataset_zoo.py @@ -0,0 +1,18 @@ +from typing import Iterable, Type + +from research.common.datasets_v3.roco.roco_datasets import ROCODatasets +from research.common.datasets_v3.roco.zoo.dji import DJIROCODatasets +from research.common.datasets_v3.roco.zoo.dji_zoomed import DJIROCOZoomedDatasets +from research.common.datasets_v3.roco.zoo.twitch import TwitchROCODatasets + + +class ROCODatasetsZoo(Iterable[Type[ROCODatasets]]): + DJI_ZOOMED = DJIROCOZoomedDatasets + DJI = DJIROCODatasets + TWITCH = TwitchROCODatasets + + def __iter__(self): + return iter((self.DJI, self.DJI_ZOOMED, self.TWITCH)) + + +ROCODatasetsZoo = ROCODatasetsZoo() diff --git a/common/research/common/datasets_v3/roco/zoo/twitch.py b/common/research/common/datasets_v3/roco/zoo/twitch.py new file mode 100644 index 0000000..013d7e4 --- /dev/null +++ b/common/research/common/datasets_v3/roco/zoo/twitch.py @@ -0,0 +1,31 @@ +from enum import auto +from pathlib import Path + +from research.common.constants import TWITCH_DSET_DIR +from research.common.datasets_v3.roco.roco_datasets import ROCODatasets + + +class TwitchROCODatasets(ROCODatasets): + def __init__(self, _): + super().__init__(self.twitch_id) + + T470149568 = auto() + T470150052 = auto() + T470151286 = auto() + T470152289 = auto() + T470152730 = auto() + T470152838 = auto() + T470153081 = auto() + T470158483 = auto() + + @classmethod + def datasets_dir(cls) -> Path: + return TWITCH_DSET_DIR / "v1" + + @property + def twitch_id(self) -> str: + return self.name[len("T") :] + + @property + def video_url(self) -> str: + return f"https://www.twitch.tv/videos/{self.twitch_id}" diff --git a/common/research/common/datasets_v3/transform_dataset.py b/common/research/common/datasets_v3/transform_dataset.py new file mode 100644 index 0000000..8b5fcb8 --- /dev/null +++ b/common/research/common/datasets_v3/transform_dataset.py @@ -0,0 +1,24 @@ +from typing import Callable, Iterator, Tuple + +from research.common.datasets_v3.dataset_builder import ExampleU, TargetU +from research.common.datasets_v3.lazy_dataset import ExampleT, LazyDataset, TargetT + + +class TransformDataset(LazyDataset[ExampleU, TargetU]): + def __init__( + self, + source: LazyDataset[ExampleT, TargetT], + example_transformer: Callable[[ExampleT], ExampleU], + target_transformer: Callable[[TargetT], TargetU], + ): + self.target_transformer = target_transformer + self.example_transformer = example_transformer + self.source = source + super().__init__(source.name) + + def __iter__(self) -> Iterator[Tuple[ExampleU, TargetU, str]]: + for example, target, name in self.source: + yield self.example_transformer(example), self.target_transformer(target), name + + def __len__(self): + return len(self.source) diff --git a/common/research/common/datasets_v3/union_dataset.py b/common/research/common/datasets_v3/union_dataset.py new file mode 100644 index 0000000..ddbd437 --- /dev/null +++ b/common/research/common/datasets_v3/union_dataset.py @@ -0,0 +1,32 @@ +from typing import Iterable, Iterator, List, Tuple + +from research.common.datasets_v3.dataset import Dataset +from research.common.datasets_v3.lazy_dataset import ExampleT, LazyDataset, TargetT + + +class UnionLazyDataset(LazyDataset[ExampleT, TargetT]): + def __init__(self, datasets: Iterable[LazyDataset[ExampleT, TargetT]], name: str = None): + self.datasets = list(datasets) + super().__init__(name or _name_from_union(self.datasets)) + + def __iter__(self) -> Iterator[Tuple[ExampleT, TargetT, str]]: + for dataset in self.datasets: + yield from dataset + + def __len__(self): + return sum(map(len, self.datasets)) + + +class UnionDataset(Dataset[ExampleT, TargetT]): + def __init__(self, datasets: Iterable[Dataset[ExampleT, TargetT]], name: str = None): + self.datasets = list(datasets) + super().__init__( + sum((d.examples for d in self.datasets), []), + sum((d.targets for d in self.datasets), []), + sum((d.names for d in self.datasets), []), + name or _name_from_union(self.datasets), + ) + + +def _name_from_union(datasets: List[LazyDataset]): + return "_".join(d.name for d in datasets) -- GitLab