diff --git a/dataset/tf_records/.gitignore b/dataset/tf_records/.gitignore index b9045fd730d938c05f2a0a14dcc3b4a035e0a633..10a2bd06302019364f5eb4b99012bd455848cef2 100644 --- a/dataset/tf_records/.gitignore +++ b/dataset/tf_records/.gitignore @@ -1 +1 @@ -*.record \ No newline at end of file +*.record diff --git a/dataset/twitch/robots-views/.gitignore b/dataset/twitch/robots-views/.gitignore index c96a04f008ee21e260b28f7701595ed59e2839e3..d6b7ef32c8478a48c3994dcadc86837f4371184d 100644 --- a/dataset/twitch/robots-views/.gitignore +++ b/dataset/twitch/robots-views/.gitignore @@ -1,2 +1,2 @@ * -!.gitignore \ No newline at end of file +!.gitignore diff --git a/dataset/twitch/runes/.gitignore b/dataset/twitch/runes/.gitignore deleted file mode 100644 index c96a04f008ee21e260b28f7701595ed59e2839e3..0000000000000000000000000000000000000000 --- a/dataset/twitch/runes/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!.gitignore \ No newline at end of file diff --git a/dataset/twitch/aerial-views/.gitignore b/dataset/twitch/v1/runes/.gitignore similarity index 100% rename from dataset/twitch/aerial-views/.gitignore rename to dataset/twitch/v1/runes/.gitignore diff --git a/poetry.lock b/poetry.lock index c6266954f868ad07b49dc8c3a868477c88468c15..0023dc53cc39b8ac2fbfafbce22496a8d0394b41 100644 Binary files a/poetry.lock and b/poetry.lock differ diff --git a/pyproject.toml b/pyproject.toml index 39a134cd2b9d0b8a8987422b0ee983394c07e101..de5e14f2b9d2552000f6af7ebe59859886946c5c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,8 +35,8 @@ pyyaml = "^5.3.1" six = "1.15.0" # https://github.com/googleapis/python-bigquery/issues/70 [tool.poetry.dev-dependencies] -tensorflow = "1.14.x" -tensorflow-estimator = "1.14.x" +tensorflow = "1.15.x" +tensorflow-estimator = "1.15.x" h5py = "<3.0.0" kivy = "^1.11.1" cloudml-hypertune = "^0.1.0-alpha.6" diff --git a/src/polystar/frame_generators/cv2_frame_generator_abc.py b/src/polystar/frame_generators/cv2_frame_generator_abc.py index 906c322991e1271ccfd557ecf26df47eff80b270..4a6c4337111622f6ae3d3b83d959a65e97097664 100644 --- a/src/polystar/frame_generators/cv2_frame_generator_abc.py +++ b/src/polystar/frame_generators/cv2_frame_generator_abc.py @@ -16,6 +16,7 @@ class CV2FrameGeneratorABC(FrameGeneratorABC, ABC): def __enter__(self): self._cap = cv2.VideoCapture(*self._capture_params()) assert self._cap.isOpened() + self._post_opening_operation() def __exit__(self, exc_type, exc_val, exc_tb): self._cap.release() @@ -31,3 +32,6 @@ class CV2FrameGeneratorABC(FrameGeneratorABC, ABC): @abstractmethod def _capture_params(self) -> Iterable[Any]: pass + + def _post_opening_operation(self): + pass diff --git a/src/polystar/frame_generators/fps_video_frame_generator.py b/src/polystar/frame_generators/fps_video_frame_generator.py index 22db099cdabc8fb3b79863a9464decea69e4defb..7e45e0434a9028cfced229a34e8a5e805c3de996 100644 --- a/src/polystar/frame_generators/fps_video_frame_generator.py +++ b/src/polystar/frame_generators/fps_video_frame_generator.py @@ -1,8 +1,6 @@ from dataclasses import dataclass from typing import Iterable -import ffmpeg - from polystar.frame_generators.video_frame_generator import VideoFrameGenerator from polystar.models.image import Image @@ -13,14 +11,9 @@ class FPSVideoFrameGenerator(VideoFrameGenerator): desired_fps: int def __post_init__(self): - self.frame_rate: int = self._get_video_fps() // self.desired_fps - - def _get_video_fps(self): - return max( - int(stream["r_frame_rate"].split("/")[0]) for stream in ffmpeg.probe(str(self.video_path))["streams"] - ) + self.frame_rate: int = self._video_fps // self.desired_fps def generate(self) -> Iterable[Image]: - for i, frame in enumerate(super().generate()): + for i, frame in enumerate(super().generate(), -1): if not i % self.frame_rate: yield frame diff --git a/src/polystar/frame_generators/video_frame_generator.py b/src/polystar/frame_generators/video_frame_generator.py index 5b74d5bff013112c2bc1fc288da10abaf22a7b28..22701b54effe603b8f43d0089924db7929793147 100644 --- a/src/polystar/frame_generators/video_frame_generator.py +++ b/src/polystar/frame_generators/video_frame_generator.py @@ -1,6 +1,10 @@ from dataclasses import dataclass from pathlib import Path -from typing import Any, Iterable +from typing import Any, Iterable, Optional + +import ffmpeg +from cv2.cv2 import CAP_PROP_POS_FRAMES +from memoized_property import memoized_property from polystar.frame_generators.cv2_frame_generator_abc import CV2FrameGeneratorABC @@ -9,6 +13,20 @@ from polystar.frame_generators.cv2_frame_generator_abc import CV2FrameGeneratorA class VideoFrameGenerator(CV2FrameGeneratorABC): video_path: Path + offset_seconds: Optional[int] def _capture_params(self) -> Iterable[Any]: return (str(self.video_path),) + + def _post_opening_operation(self): + if self.offset_seconds: + self._cap.set(CAP_PROP_POS_FRAMES, self._video_fps * self.offset_seconds - 2) + + @memoized_property + def _video_fps(self) -> int: + streams_info = ffmpeg.probe(str(self.video_path))["streams"] + for stream_info in streams_info: + if stream_info["codec_type"] != "video": + continue + return round(eval(stream_info["avg_frame_rate"])) + raise ValueError(f"No fps found for video {self.video_path.name}") diff --git a/src/polystar/utils/path.py b/src/polystar/utils/path.py new file mode 100644 index 0000000000000000000000000000000000000000..be35166b13f8908889afc944ce7808a79f84ecf0 --- /dev/null +++ b/src/polystar/utils/path.py @@ -0,0 +1,22 @@ +from pathlib import Path +from shutil import copy, make_archive, move +from typing import Iterable + + +def move_file(source: Path, destination_directory: Path): + destination_directory.mkdir(exist_ok=True, parents=True) + move(str(source), str(destination_directory)) + + +def move_files(sources: Iterable[Path], destination_directory: Path): + for source in sources: + move_file(source, destination_directory) + + +def copy_file(source: Path, destination_directory: Path): + destination_directory.mkdir(exist_ok=True, parents=True) + copy(str(source), str(destination_directory)) + + +def archive_directory(directory:Path): + make_archive(str(directory), "zip", str(directory)) diff --git a/src/research/common/constants.py b/src/research/common/constants.py index dab2f5e382f3b468942afa3a9fd5a04c710ca451..b52148a1029d0e7a4d0ce5796e3ef0b0da5dbbe3 100644 --- a/src/research/common/constants.py +++ b/src/research/common/constants.py @@ -9,14 +9,13 @@ DJI_ROCO_DSET_DIR: Path = DSET_DIR / "dji_roco" DJI_ROCO_ZOOMED_DSET_DIR: Path = DSET_DIR / "dji_roco_zoomed_v2" TENSORFLOW_RECORDS_DIR: Path = DSET_DIR / "tf_records" TWITCH_ROBOTS_VIEWS_DIR: Path = TWITCH_DSET_DIR / "robots-views" -TWITCH_DSET_ROBOTS_VIEWS_DIR: Path = TWITCH_DSET_DIR / "final-robots-views" + TWITCH_DSET_DIR.mkdir(parents=True, exist_ok=True) DJI_ROCO_DSET_DIR.mkdir(parents=True, exist_ok=True) DJI_ROCO_ZOOMED_DSET_DIR.mkdir(parents=True, exist_ok=True) TENSORFLOW_RECORDS_DIR.mkdir(parents=True, exist_ok=True) TWITCH_ROBOTS_VIEWS_DIR.mkdir(parents=True, exist_ok=True) -TWITCH_DSET_ROBOTS_VIEWS_DIR.mkdir(parents=True, exist_ok=True) EVALUATION_DIR: Path = PROJECT_DIR / "experiments" diff --git a/src/research/common/datasets/roco/roco_annotation.py b/src/research/common/datasets/roco/roco_annotation.py index c1e47c6d749a2dc7d75177921fd0de8b4ffc5167..fdcd57d55544dee7e2f773e34987c8da0278ad8a 100644 --- a/src/research/common/datasets/roco/roco_annotation.py +++ b/src/research/common/datasets/roco/roco_annotation.py @@ -8,6 +8,7 @@ import xmltodict from dicttoxml import dicttoxml from polystar.models.roco_object import Armor, ROCOObject, ROCOObjectFactory +from polystar.utils.path import move_file @dataclass @@ -45,6 +46,13 @@ class ROCOAnnotation: h=int(xml_dict["size"]["height"]), ) + def save_in_directory(self, directory: Path, name: str): + directory.mkdir(exist_ok=True, parents=True) + self.save_in_file((directory / name).with_suffix(".xml")) + + def save_in_file(self, file: Path): + file.write_text(self.to_xml()) + def to_xml(self) -> str: return parseString( dicttoxml( @@ -61,3 +69,11 @@ class ROCOAnnotation: .replace(b"<object><object>", b"<object>") .replace(b"</object></object>", b"</object>") ).toprettyxml() + + +def move_image_and_annotation(source_dataset_directory: Path, destination_dataset_directory: Path, name: str): + move_file((source_dataset_directory / "image" / name).with_suffix(".jpg"), destination_dataset_directory / "image") + move_file( + (source_dataset_directory / "image_annotation" / name).with_suffix(".xml"), + destination_dataset_directory / "image_annotation", + ) diff --git a/src/research/dataset/scripts/create_tensorflow_records.py b/src/research/dataset/scripts/create_tensorflow_records.py index 416902795842957cbcc9cd9b8d0aa3b3d72e9982..3921e81c7e2ba9020b5a2ff14127e533ae738b9c 100644 --- a/src/research/dataset/scripts/create_tensorflow_records.py +++ b/src/research/dataset/scripts/create_tensorflow_records.py @@ -10,12 +10,12 @@ def create_one_record_per_roco_dset(): if __name__ == "__main__": TensorflowRecordFactory.from_datasets(ROCODatasetsZoo.DEFAULT_TEST_DATASETS, "Twitch_Test_") - TensorflowRecordFactory.from_datasets(ROCODatasetsZoo.TWITCH_TRAIN_DATASETS, "Twitch_Train_") - TensorflowRecordFactory.from_datasets(ROCODatasetsZoo.DJI_TRAIN_DATASETS, "DJI_Train_") - TensorflowRecordFactory.from_datasets(ROCODatasetsZoo.DJI_ZOOMED_TRAIN_DATASETS, "DJIZoomedV2_Train_") - TensorflowRecordFactory.from_datasets( - ROCODatasetsZoo.TWITCH_TRAIN_DATASETS + ROCODatasetsZoo.DJI_TRAIN_DATASETS, "Twitch_DJI_Train_" - ) - TensorflowRecordFactory.from_datasets( - ROCODatasetsZoo.TWITCH_TRAIN_DATASETS + ROCODatasetsZoo.DJI_ZOOMED_TRAIN_DATASETS, "Twitch_DJIZoomedV2_Train_" - ) + # TensorflowRecordFactory.from_datasets(ROCODatasetsZoo.TWITCH_TRAIN_DATASETS, "Twitch_Train_") + # TensorflowRecordFactory.from_datasets(ROCODatasetsZoo.DJI_TRAIN_DATASETS, "DJI_Train_") + # TensorflowRecordFactory.from_datasets(ROCODatasetsZoo.DJI_ZOOMED_TRAIN_DATASETS, "DJIZoomedV2_Train_") + # TensorflowRecordFactory.from_datasets( + # ROCODatasetsZoo.TWITCH_TRAIN_DATASETS + ROCODatasetsZoo.DJI_TRAIN_DATASETS, "Twitch_DJI_Train_" + # ) + # TensorflowRecordFactory.from_datasets( + # ROCODatasetsZoo.TWITCH_TRAIN_DATASETS + ROCODatasetsZoo.DJI_ZOOMED_TRAIN_DATASETS, "Twitch_DJIZoomedV2_Train_" + # ) diff --git a/src/research/dataset/scripts/extract_robots_views_from_video.py b/src/research/dataset/scripts/extract_robots_views_from_video.py index f011324320c418bfa0f11079f2b84a7eb61df26a..f2e18fa2ebfacdfae924bc9092c1981fdca0e5d5 100644 --- a/src/research/dataset/scripts/extract_robots_views_from_video.py +++ b/src/research/dataset/scripts/extract_robots_views_from_video.py @@ -1,7 +1,4 @@ -import sys - from research.dataset.twitch.robots_views_extractor import RobotsViewExtractor if __name__ == "__main__": - for _video_name in sys.argv[1:]: - RobotsViewExtractor(_video_name).run() + RobotsViewExtractor(input("Video id: ")).run() diff --git a/src/research/dataset/scripts/make_twitch_chunks_to_annotate.py b/src/research/dataset/scripts/make_twitch_chunks_to_annotate.py index 136ab64fdafa3cdcedd7fe3f31d45b9817b87a8e..eb88e440ffa005821f64a53235b6559c146251f7 100644 --- a/src/research/dataset/scripts/make_twitch_chunks_to_annotate.py +++ b/src/research/dataset/scripts/make_twitch_chunks_to_annotate.py @@ -1,45 +1,36 @@ -import shutil from dataclasses import dataclass -from itertools import count from pathlib import Path -from typing import Iterator +from typing import List -from research.common.constants import TWITCH_ROBOTS_VIEWS_DIR +from more_itertools import ilen + +from polystar.utils.iterable_utils import chunk +from polystar.utils.path import archive_directory, move_files +from research.common.constants import TWITCH_DSET_DIR, TWITCH_ROBOTS_VIEWS_DIR @dataclass -class DatasetChunker: +class ImagesChunker: dataset_dir: Path + chunks_dir: Path chunk_size: int = 100 def make_chunks(self): - try: - image_paths_iterator = self.dataset_dir.glob("*.jpg") - for chunk_number in count(1 + self._get_number_existing_chunks()): - self._make_next_chunk(chunk_number, image_paths_iterator) - except StopIteration: - self._zip_chunk(self._get_chunk_dir(self._get_number_existing_chunks() + 1)) - - def _make_next_chunk(self, chunk_number: int, image_paths_iterator: Iterator[Path]): - chunk_dir = self._get_chunk_dir(chunk_number) - chunk_dir.mkdir() - for _ in range(self.chunk_size): - image_path = next(image_paths_iterator) - shutil.move(str(image_path), str(chunk_dir / image_path.name)) - self._zip_chunk(chunk_dir) - - def _get_chunk_dir(self, chunk_number: int): - return self.dataset_dir / f"chunk_{chunk_number:03}" + images = self.dataset_dir.glob("**/*.jpg") + for chunk_images in chunk(images, self.chunk_size): + self._make_chunk(chunk_images) - def _get_number_existing_chunks(self): - return int(str(max(self.dataset_dir.glob("chunk_*.zip"), default="chunk_000.zip"))[-7:-4]) + def _make_chunk(self, images: List[Path]): + chunk_dir = self._get_next_available_chunk() + move_files(images, chunk_dir) + archive_directory(chunk_dir) - @staticmethod - def _zip_chunk(chunk_dir: Path): - shutil.make_archive(str(chunk_dir), "zip", str(chunk_dir)) + def _get_next_available_chunk(self) -> Path: + chunk_number = ilen(self.chunks_dir.glob("chunk_*.zip")) + return self.chunks_dir / f"chunk_{chunk_number:03}" if __name__ == "__main__": - DatasetChunker(TWITCH_ROBOTS_VIEWS_DIR).make_chunks() + ImagesChunker(TWITCH_ROBOTS_VIEWS_DIR, TWITCH_DSET_DIR / "chunks-to-annotate").make_chunks() diff --git a/src/research/dataset/scripts/match_hd_with_720p.py b/src/research/dataset/scripts/match_hd_with_720p.py new file mode 100644 index 0000000000000000000000000000000000000000..ea21c9be7861052257223e60e9ce6bcc2eb458d8 --- /dev/null +++ b/src/research/dataset/scripts/match_hd_with_720p.py @@ -0,0 +1,90 @@ +import json +from os import remove +from pathlib import Path + +from polystar.models.box import Box +from polystar.models.image import load_image +from polystar.utils.path import copy_file, move_file +from research.common.constants import TWITCH_DSET_DIR, TWITCH_ROBOTS_VIEWS_DIR +from research.common.datasets.roco.roco_annotation import ROCOAnnotation +from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilder +from research.common.datasets.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo +from research.dataset.twitch.mask_detector import has_bonus_icon, is_aerial_view + +AERIAL_DIR = TWITCH_DSET_DIR / "v2" / "aerial-views" +RUNES_DIR = TWITCH_DSET_DIR / "v2" / "runes" + + +def match_on_dataset(builder: ROCODatasetBuilder): + twitch_id = builder.main_dir.name + hd_images_directory = TWITCH_ROBOTS_VIEWS_DIR / twitch_id + + dataset_v2_directory = TWITCH_DSET_DIR / "v2" / twitch_id + + _move_images_with_720p_annotations(builder, dataset_v2_directory, hd_images_directory, twitch_id) + _copy_changes_locks(builder, dataset_v2_directory) + _move_aerials_views(hd_images_directory) + + +def _move_images_with_720p_annotations( + builder: ROCODatasetBuilder, dataset_v2_directory: Path, hd_images_directory: Path, twitch_id: str +): + dataset = builder.build_lazy() + missing_images = [] + for image_file, annotation, _ in dataset: + hd_image_file = hd_images_directory / image_file.name + if hd_image_file.exists(): + hd_image = load_image(hd_image_file) + if has_bonus_icon(hd_image): + remove(hd_image_file) + continue + elif is_aerial_view(hd_image): + directory = AERIAL_DIR + elif annotation.has_rune: + directory = RUNES_DIR + else: + directory = dataset_v2_directory + move_file(hd_image_file, directory / "image") + _scale_annotation(annotation, height=1080, width=1920) + annotation.save_in_directory(directory / "image_annotation", image_file.stem) + else: + missing_images.append(str(image_file)) + print(f"{len(missing_images)} missing images in {twitch_id}") + (hd_images_directory / "missing.json").write_text(json.dumps(missing_images)) + + +def _scale_annotation(annotation: ROCOAnnotation, height: int, width: int): + vertical_ratio, horizontal_ratio = height / annotation.h, width / annotation.w + + for obj in annotation.objects: + obj.box = Box.from_positions( + x1=int(obj.box.x1 * horizontal_ratio), + y1=int(obj.box.y1 * vertical_ratio), + x2=int(obj.box.x2 * horizontal_ratio), + y2=int(obj.box.y2 * vertical_ratio), + ) + + annotation.w, annotation.h = width, height + + +def _copy_changes_locks(builder, dataset_v2_directory): + for task in ("colors", "digits"): + changes_lock = builder.main_dir / f"{task}/.changes" + if changes_lock.exists(): + copy_file(changes_lock, dataset_v2_directory / task) + + +def _move_aerials_views(hd_images_directory): + for hd_image_file in hd_images_directory.glob("*.jpg"): + image = load_image(hd_image_file) + if has_bonus_icon(image): + remove(hd_image_file) + elif is_aerial_view(image): + move_file(hd_image_file, AERIAL_DIR / "unannotated_image") + + +if __name__ == "__main__": + for _builder in ROCODatasetsZoo.TWITCH: + match_on_dataset(_builder) + for _new_twitch_id in ("470149066", "470152932"): + _move_aerials_views(TWITCH_ROBOTS_VIEWS_DIR / _new_twitch_id) diff --git a/src/research/dataset/scripts/move_aerial_views.py b/src/research/dataset/scripts/move_aerial_views.py deleted file mode 100644 index 5b71f39bf6459e6b48d6ccadf361930ddcfe04b5..0000000000000000000000000000000000000000 --- a/src/research/dataset/scripts/move_aerial_views.py +++ /dev/null @@ -1,20 +0,0 @@ -from shutil import move - -from skimage import io -from tqdm import tqdm - -from research.common.constants import TWITCH_DSET_DIR, TWITCH_ROBOTS_VIEWS_DIR -from research.dataset.twitch.aerial_view_detector import aerial_view_detector - -AERIAL_VIEWS_DIR = TWITCH_DSET_DIR / "aerial-views" - -AERIAL_VIEWS_DIR.mkdir(parents=True, exist_ok=True) - -if __name__ == "__main__": - n = 0 - for file_path in tqdm(list(TWITCH_ROBOTS_VIEWS_DIR.glob("*.jpg")), unit="image", desc="Moving aerial views"): - if aerial_view_detector.is_matching(io.imread(str(file_path))): - move(str(file_path), str(AERIAL_VIEWS_DIR / file_path.name)) - n += 1 - - print(f"Moved {n} images") diff --git a/src/research/dataset/scripts/visualize_dataset.py b/src/research/dataset/scripts/visualize_dataset.py index 23871061b5389274106d43f2b6d0bfe73c828d1d..6dc7e5017755567bb42d4ae098257f4629d5b6f4 100644 --- a/src/research/dataset/scripts/visualize_dataset.py +++ b/src/research/dataset/scripts/visualize_dataset.py @@ -14,4 +14,5 @@ def visualize_dataset(dataset: LazyROCODataset, n_images: int): if __name__ == "__main__": - visualize_dataset(ROCODatasetsZoo.DJI_ZOOMED.CENTRAL_CHINA.to_images().build_lazy(), 20) + for builder in ROCODatasetsZoo.TWITCH: + visualize_dataset(builder.to_images().build_lazy(), 20) diff --git a/src/research/dataset/tensorflow_record.py b/src/research/dataset/tensorflow_record.py index e69196425be21adc5f56e467ca3df580cfb697b2..615b23551adc4d256db0bdeae8d66c022243967c 100644 --- a/src/research/dataset/tensorflow_record.py +++ b/src/research/dataset/tensorflow_record.py @@ -3,10 +3,13 @@ from pathlib import Path from shutil import move from typing import List -import tensorflow as tf -from tensorflow_core.python.lib.io import python_io +from tensorflow.core.example.example_pb2 import Example +from tensorflow.core.example.feature_pb2 import BytesList, Feature, Features, FloatList, Int64List +from tensorflow_core.python.lib.io.tf_record import TFRecordWriter +from polystar.filters.pass_through_filter import PassThroughFilter from polystar.models.label_map import label_map +from polystar.target_pipeline.objects_filters.objects_filter_abc import ObjectsFilterABC from polystar.utils.tqdm import smart_tqdm from research.common.constants import TENSORFLOW_RECORDS_DIR from research.common.datasets.roco.roco_annotation import ROCOAnnotation @@ -14,14 +17,19 @@ from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilde class TensorflowRecordFactory: - @staticmethod - def from_datasets(datasets: List[ROCODatasetBuilder], prefix: str = ""): + def __init__(self, objects_filter: ObjectsFilterABC = PassThroughFilter()): + self.objects_filter = objects_filter + + def from_dataset(self, dataset: ROCODatasetBuilder, prefix: str = ""): + self.from_datasets([dataset], prefix) + + def from_datasets(self, datasets: List[ROCODatasetBuilder], prefix: str = ""): record_name = prefix + "_".join(d.name for d in datasets) - writer = python_io.TFRecordWriter(str(TENSORFLOW_RECORDS_DIR / f"{record_name}.record")) + writer = TFRecordWriter(str(TENSORFLOW_RECORDS_DIR / f"{record_name}.record")) c = 0 for dataset in smart_tqdm(datasets, desc=record_name, unit="dataset"): for image_path, annotation, _ in smart_tqdm(dataset, desc=dataset.name, unit="img", leave=False): - writer.write(_example_from_image_annotation(image_path, annotation).SerializeToString()) + writer.write(self._example_from_image_annotation(image_path, annotation).SerializeToString()) c += 1 writer.close() move( @@ -29,65 +37,61 @@ class TensorflowRecordFactory: str(TENSORFLOW_RECORDS_DIR / f"{record_name}_{c}_imgs.record"), ) - @staticmethod - def from_dataset(dataset: ROCODatasetBuilder, prefix: str = ""): - TensorflowRecordFactory.from_datasets([dataset], prefix) - - -def _example_from_image_annotation(image_path: Path, annotation: ROCOAnnotation) -> tf.train.Example: - image_name = image_path.name - encoded_jpg = image_path.read_bytes() - key = hashlib.sha256(encoded_jpg).hexdigest() - - width, height = annotation.w, annotation.h - - xmin, ymin, xmax, ymax, classes, classes_text = [], [], [], [], [], [] - - for obj in annotation.objects: - xmin.append(float(obj.box.x1) / width) - ymin.append(float(obj.box.y1) / height) - xmax.append(float(obj.box.x2) / width) - ymax.append(float(obj.box.y2) / height) - classes_text.append(obj.type.name.lower().encode("utf8")) - classes.append(label_map.id_of(obj.type.name.lower())) - - return tf.train.Example( - features=tf.train.Features( - feature={ - "image/filename": bytes_feature(image_name.encode("utf8")), - "image/source_id": bytes_feature(image_name.encode("utf8")), - "image/height": int64_feature(height), - "image/width": int64_feature(width), - "image/key/sha256": bytes_feature(key.encode("utf8")), - "image/encoded": bytes_feature(encoded_jpg), - "image/format": bytes_feature("jpeg".encode("utf8")), - "image/object/bbox/xmin": float_list_feature(xmin), - "image/object/bbox/xmax": float_list_feature(xmax), - "image/object/bbox/ymin": float_list_feature(ymin), - "image/object/bbox/ymax": float_list_feature(ymax), - "image/object/class/text": bytes_list_feature(classes_text), - "image/object/class/label": int64_list_feature(classes), - } + def _example_from_image_annotation(self, image_path: Path, annotation: ROCOAnnotation) -> Example: + image_name = image_path.name + encoded_jpg = image_path.read_bytes() + key = hashlib.sha256(encoded_jpg).hexdigest() + + width, height = annotation.w, annotation.h + + xmin, ymin, xmax, ymax, classes, classes_text = [], [], [], [], [], [] + + for obj in self.objects_filter.filter(annotation.objects): + xmin.append(float(obj.box.x1) / width) + ymin.append(float(obj.box.y1) / height) + xmax.append(float(obj.box.x2) / width) + ymax.append(float(obj.box.y2) / height) + classes_text.append(obj.type.name.lower().encode("utf8")) + classes.append(label_map.id_of(obj.type.name.lower())) + + return Example( + features=Features( + feature={ + "image/filename": bytes_feature(image_name.encode("utf8")), + "image/source_id": bytes_feature(image_name.encode("utf8")), + "image/height": int64_feature(height), + "image/width": int64_feature(width), + "image/key/sha256": bytes_feature(key.encode("utf8")), + "image/encoded": bytes_feature(encoded_jpg), + "image/format": bytes_feature("jpeg".encode("utf8")), + "image/object/bbox/xmin": float_list_feature(xmin), + "image/object/bbox/xmax": float_list_feature(xmax), + "image/object/bbox/ymin": float_list_feature(ymin), + "image/object/bbox/ymax": float_list_feature(ymax), + "image/object/class/text": bytes_list_feature(classes_text), + "image/object/class/label": int64_list_feature(classes), + } + ) ) - ) -# Functions copied from https://github.com/tensorflow/models/blob/master/research/object_detection/utils/dataset_util.py -def int64_feature(value): - return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) +# Functions inspired from +# https://github.com/tensorflow/models/blob/master/research/object_detection/utils/dataset_util.py +def int64_feature(value: int) -> Feature: + return int64_list_feature([value]) -def int64_list_feature(value): - return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) +def int64_list_feature(value: List[int]) -> Feature: + return Feature(int64_list=Int64List(value=value)) -def bytes_feature(value): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) +def bytes_feature(value: bytes) -> Feature: + return bytes_list_feature([value]) -def bytes_list_feature(value): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) +def bytes_list_feature(value: List[bytes]) -> Feature: + return Feature(bytes_list=BytesList(value=value)) -def float_list_feature(value): - return tf.train.Feature(float_list=tf.train.FloatList(value=value)) +def float_list_feature(value: List[float]) -> Feature: + return Feature(float_list=FloatList(value=value)) diff --git a/src/research/dataset/twitch/README.md b/src/research/dataset/twitch/README.md index 00976f6f0216dc4c077f4a35a3dbe904253f6343..d1a360b28438a6ba2c3c31efe12a30120d587399 100644 --- a/src/research/dataset/twitch/README.md +++ b/src/research/dataset/twitch/README.md @@ -43,8 +43,8 @@ You have 2 options to download the videos: 3. Rename it using the video id on twitch, and place it in [dataset/twitch/videos](../../../../dataset/twitch/videos) 5. Launch the python script [../scripts/extract_robots_views_from_video.py](../scripts/extract_robots_views_from_video.py), with the video id as parameter (In Pycharm, `Run` > `Edit Configurations...`, then in parameters enter the id). You can put multiple video ids by separating them with spaces. -The frames will appear in the [dataset/twitch/robots-views](../../../../dataset/twitch/robots-views) folder. +The frames will appear in the [dataset/twitch/robots-views](../../../../dataset/twitch/robots-views-hd-decalees) folder. ## Aerial dataset -Once you have the robots views in the [dataset/twitch/robots-views](../../../../dataset/twitch/robots-views) folder, run the python script [../scripts/move_aerial_views.py](../scripts/move_aerial_views.py). It will put the aerial views in the [dataset/twitch/aerial-views](../../../../dataset/twitch/aerial-views) directory. +Once you have the robots views in the [dataset/twitch/robots-views](../../../../dataset/twitch/robots-views-hd-decalees) folder, run the python script [../scripts/move_aerial_views.py](../scripts/move_aerial_and_runes_views.py). It will put the aerial views in the [dataset/twitch/aerial-views](../../../../dataset/twitch/aerial-views) directory. diff --git a/src/research/dataset/twitch/aerial_view_detector.py b/src/research/dataset/twitch/aerial_view_detector.py deleted file mode 100644 index f1f0332f34b623eac8566e1c36a133c4843677dd..0000000000000000000000000000000000000000 --- a/src/research/dataset/twitch/aerial_view_detector.py +++ /dev/null @@ -1,23 +0,0 @@ -from pathlib import Path - -from skimage import io - -from research.common.constants import TWITCH_DSET_DIR -from research.dataset.twitch.mask_detector import MaskDetector - -aerial_view_detector = MaskDetector( - Path(__file__).parent / "mask_aerial.jpg", - [ - (527, 528, 292, 297, 20), - (527, 531, 303, 303, 20), - (532, 537, 286, 287, 20), - (536, 541, 302, 303, 20), - (543, 544, 292, 297, 20), - (535, 535, 292, 297, 20), - ], -) - -if __name__ == "__main__": - for file_path in sorted((TWITCH_DSET_DIR / "robots-views").glob("*.jpg")): - if aerial_view_detector.is_matching(io.imread(str(file_path))): - print(file_path.name) diff --git a/src/research/dataset/twitch/mask_aerial_blue_2_hd.jpg b/src/research/dataset/twitch/mask_aerial_blue_2_hd.jpg new file mode 100644 index 0000000000000000000000000000000000000000..27e14148e00f8bb34ade761c4314ee14813ffc62 Binary files /dev/null and b/src/research/dataset/twitch/mask_aerial_blue_2_hd.jpg differ diff --git a/src/research/dataset/twitch/mask_aerial_blue_hd.jpg b/src/research/dataset/twitch/mask_aerial_blue_hd.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b208e82ae4a622880a4af3146bb4bc65cbbfd118 Binary files /dev/null and b/src/research/dataset/twitch/mask_aerial_blue_hd.jpg differ diff --git a/src/research/dataset/twitch/mask_aerial_red_2_hd.jpg b/src/research/dataset/twitch/mask_aerial_red_2_hd.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0fedc05eb627cbbf82a084c3815930f0740f6b88 Binary files /dev/null and b/src/research/dataset/twitch/mask_aerial_red_2_hd.jpg differ diff --git a/src/research/dataset/twitch/mask_aerial_red_hd.jpg b/src/research/dataset/twitch/mask_aerial_red_hd.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fd8e3e99827af2527f68b19837673ed306e031ce Binary files /dev/null and b/src/research/dataset/twitch/mask_aerial_red_hd.jpg differ diff --git a/src/research/dataset/twitch/mask_bonus.jpg b/src/research/dataset/twitch/mask_bonus.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5f5182c80aa14f0a836d88e7c464eb8e95504397 Binary files /dev/null and b/src/research/dataset/twitch/mask_bonus.jpg differ diff --git a/src/research/dataset/twitch/mask_bonus_2.jpg b/src/research/dataset/twitch/mask_bonus_2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b9b90558a8f1026913bb4480f4fe562a24ff7a9e Binary files /dev/null and b/src/research/dataset/twitch/mask_bonus_2.jpg differ diff --git a/src/research/dataset/twitch/mask_bonus_3.jpg b/src/research/dataset/twitch/mask_bonus_3.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9c5d13959b5e6c3d7ca12463f4df1209c1c9365b Binary files /dev/null and b/src/research/dataset/twitch/mask_bonus_3.jpg differ diff --git a/src/research/dataset/twitch/mask_detector.py b/src/research/dataset/twitch/mask_detector.py index 8ffd748dc0fe4468087b6053acb8777385a3ed5d..4d63b526c8dda29406ba90f826450a5440f6b773 100644 --- a/src/research/dataset/twitch/mask_detector.py +++ b/src/research/dataset/twitch/mask_detector.py @@ -1,67 +1,51 @@ -import math from pathlib import Path -from typing import List, Tuple -import cv2 import numpy as np +from polystar.models.image import Image, load_image -class Zone: - def __init__(self, x_min, x_max, y_min, y_max, threshold, active_pixels, image_mask): +DIR_PATH = Path(__file__).parent - self.pixels = [(x, y) for x, y in active_pixels if (y_min <= y <= y_max) and (x_min <= x <= x_max)] - self.mean_r = self.get_mean(0, image_mask) - self.mean_g = self.get_mean(1, image_mask) - self.mean_b = self.get_mean(2, image_mask) - self.threshold = threshold +class Mask: + def __init__(self, mask_file: Path, threshold: float): + self._threshold = threshold + mask_image = load_image(mask_file) + self._mask_coordinates = np.where(mask_image.max(axis=-1) > 40) + self._mask_values = mask_image[self._mask_coordinates].astype(np.int16) - def get_mean(self, color, img): - return sum([img[pix[0], pix[1]][color] for pix in self.pixels]) / len(self.pixels) + def match(self, image: Image) -> bool: + value = np.abs(self._mask_values - image[self._mask_coordinates]).mean() + # print(value) + return value <= self._threshold - def get_means(self, img): - mr, mg, mb = 0, 0, 0 - for pix in self.pixels: - p = img[pix[0], pix[1]] - mr += p[0] - mg += p[1] - mb += p[2] - n_pixels = len(self.pixels) - return mr / n_pixels, mg / n_pixels, mb / n_pixels +robot_view_mask_hd = Mask(DIR_PATH / "mask_robot_view_hd.jpg", 20) +aerial_view_mask_red_hd = Mask(DIR_PATH / "mask_aerial_red_hd.jpg", 15) +aerial_view_mask_red_2_hd = Mask(DIR_PATH / "mask_aerial_red_2_hd.jpg", 15) +aerial_view_mask_blue_hd = Mask(DIR_PATH / "mask_aerial_blue_hd.jpg", 15) +aerial_view_mask_blue_2_hd = Mask(DIR_PATH / "mask_aerial_blue_2_hd.jpg", 15) +bonus_view_mask_hd = Mask(DIR_PATH / "mask_bonus.jpg", 20) +bonus_2_view_mask_hd = Mask(DIR_PATH / "mask_bonus_2.jpg", 20) +bonus_3_view_mask_hd = Mask(DIR_PATH / "mask_bonus_3.jpg", 20) - def is_matching(self, frame: np.ndarray): - mean_r, mean_g, mean_b = self.get_means(frame) - return ( - math.sqrt(pow(mean_r - self.mean_r, 2) + pow(mean_g - self.mean_g, 2) + pow(mean_b - self.mean_b, 2)) - < self.threshold - ) - - -class MaskDetector: - def __init__(self, image_path: Path, zones_params: List[Tuple[int, int, int, int, int]]): - image_mask = cv2.imread(str(image_path)) - active_px = [ - (a, b) - for a in range(0, 720) - for b in range(0, 1280) - if ( - image_mask[a, b].any() - and int(image_mask[a, b][0]) + int(image_mask[a, b][1]) + int(image_mask[a, b][2]) > 50 - ) - ] - self.zones = [Zone(*zone_params, active_px, image_mask) for zone_params in zones_params] +def is_aerial_view(image: Image) -> bool: + return ( + aerial_view_mask_red_hd.match(image) + or aerial_view_mask_red_2_hd.match(image) + or aerial_view_mask_blue_hd.match(image) + or aerial_view_mask_blue_2_hd.match(image) + ) - def is_matching(self, frame: np.ndarray): - return all(zone.is_matching(frame) for zone in self.zones) +def has_bonus_icon(image: Image) -> bool: + return bonus_view_mask_hd.match(image) or bonus_2_view_mask_hd.match(image) or bonus_3_view_mask_hd.match(image) -robot_view_detector = MaskDetector( - Path(__file__).parent / "mask_robot_view.jpg", - [(0, 2000, 20, 70, 20), (0, 2000, 270, 370, 20), (0, 2000, 510, 770, 20),], -) - -def is_image_from_robot_view(frame): - return robot_view_detector.is_matching(frame) +if __name__ == "__main__": + has_bonus_icon( + load_image( + Path("/Users/cytadel/polystar/cv-code/dataset/twitch/robots-views/470152932/470152932-frame-007460.jpg") + ) + ) diff --git a/src/research/dataset/twitch/mask_robot_view_hd.jpg b/src/research/dataset/twitch/mask_robot_view_hd.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c788ec54004dcb1ffe416f3c25a0cf972e295e2c Binary files /dev/null and b/src/research/dataset/twitch/mask_robot_view_hd.jpg differ diff --git a/src/research/dataset/twitch/resize_mask.py b/src/research/dataset/twitch/resize_mask.py new file mode 100644 index 0000000000000000000000000000000000000000..238629956b3c4f23043b34287016a14f9b7c5c57 --- /dev/null +++ b/src/research/dataset/twitch/resize_mask.py @@ -0,0 +1,7 @@ +from pathlib import Path + +from imutils import resize + +from polystar.models.image import load_image, save_image + +save_image(resize(load_image(Path("mask_aerial.jpg")), 1920, 1080), Path("mask_aerial_red_hd.jpg")) diff --git a/src/research/dataset/twitch/robots_views_extractor.py b/src/research/dataset/twitch/robots_views_extractor.py index aa2b306a3da58c6ff5c01ad2a50a29e7b608f280..aa58f27bb3b6b1b29dbf234bfa63ac623c92c40b 100644 --- a/src/research/dataset/twitch/robots_views_extractor.py +++ b/src/research/dataset/twitch/robots_views_extractor.py @@ -5,23 +5,27 @@ from tqdm import tqdm from polystar.frame_generators.fps_video_frame_generator import FPSVideoFrameGenerator from research.common.constants import TWITCH_DSET_DIR, TWITCH_ROBOTS_VIEWS_DIR -from research.dataset.twitch.mask_detector import is_image_from_robot_view +from research.dataset.twitch.mask_detector import has_bonus_icon, robot_view_mask_hd class RobotsViewExtractor: FPS = 2 + OFFSET_SECONDS = 3140 // 2 def __init__(self, video_name: str): self.video_name: str = video_name self.video_path = TWITCH_DSET_DIR / "videos" / f"{video_name}.mp4" - self.frame_generator: FPSVideoFrameGenerator = FPSVideoFrameGenerator(self.video_path, self.FPS) + self.frame_generator: FPSVideoFrameGenerator = FPSVideoFrameGenerator( + self.video_path, self.OFFSET_SECONDS, self.FPS + ) self.count = 0 (TWITCH_ROBOTS_VIEWS_DIR / self.video_name).mkdir(exist_ok=True) + self._progress_bar = None def run(self): self._progress_bar = tqdm( - enumerate(self.frame_generator.generate()), + enumerate(self.frame_generator.generate(), 1 + self.OFFSET_SECONDS * self.FPS), total=self._get_number_of_frames(), desc=f"Extracting robots views from video {self.video_name}.mp4", unit="frames", @@ -32,7 +36,7 @@ class RobotsViewExtractor: print(f"Detected {self.count} robots views") def _process_frame(self, frame: np.ndarray, frame_number: int): - if is_image_from_robot_view(frame): + if robot_view_mask_hd.match(frame) and not has_bonus_icon(frame): self._save_frame(frame, frame_number) self.count += 1 self._progress_bar.set_description( @@ -40,9 +44,7 @@ class RobotsViewExtractor: ) def _save_frame(self, frame: np.ndarray, frame_number: int): - cv2.imwrite( - f"{TWITCH_ROBOTS_VIEWS_DIR}/{self.video_name}/{self.video_name}-frame-{frame_number + 1:06}.jpg", frame - ) + cv2.imwrite(f"{TWITCH_ROBOTS_VIEWS_DIR}/{self.video_name}/{self.video_name}-frame-{frame_number:06}.jpg", frame) def _get_number_of_frames(self): return int(ffmpeg.probe(str(self.video_path))["format"]["duration"].split(".")[0]) * self.FPS diff --git a/src/research/tmp.py b/src/research/tmp.py deleted file mode 100644 index b30695e9cbcc3862e45409624640cf572662785f..0000000000000000000000000000000000000000 --- a/src/research/tmp.py +++ /dev/null @@ -1,3 +0,0 @@ -from research.common.constants import DSET_DIR - -print(DSET_DIR)