diff --git a/common/polystar/common/models/image.py b/common/polystar/common/models/image.py index 880f86e86bd0b5133f80844665a5d05dcc2f2944..1bf4fc61f065337992ef42e9f181f79729679b7c 100644 --- a/common/polystar/common/models/image.py +++ b/common/polystar/common/models/image.py @@ -1,12 +1,10 @@ -from __future__ import annotations - from pathlib import Path +import cv2 from nptyping import Array -from skimage import io class Image(Array[int, ..., ..., 3]): @staticmethod - def from_path(image_path: Path) -> Image: - return io.imread(str(image_path)) + def from_path(image_path: Path) -> "Image": + return cv2.cvtColor(cv2.imread(str(image_path)), cv2.COLOR_BGR2RGB) diff --git a/common/polystar/common/models/image_annotation.py b/common/polystar/common/models/image_annotation.py index 4549d064369d775eba488ea85b70c76a43737966..1d202d446e44fbf2d9f45b3cd430afd228e88e09 100644 --- a/common/polystar/common/models/image_annotation.py +++ b/common/polystar/common/models/image_annotation.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import logging from dataclasses import dataclass, field from pathlib import Path @@ -35,7 +33,7 @@ class ImageAnnotation: return self._image @staticmethod - def from_xml_file(xml_file: Path) -> ImageAnnotation: + def from_xml_file(xml_file: Path) -> "ImageAnnotation": try: annotation = xmltodict.parse(xml_file.read_text())["annotation"] diff --git a/common/polystar/common/models/trt_model.py b/common/polystar/common/models/trt_model.py new file mode 100644 index 0000000000000000000000000000000000000000..676e06af75a83aad7058d873874c848a73cc743f --- /dev/null +++ b/common/polystar/common/models/trt_model.py @@ -0,0 +1,83 @@ +import ctypes +from pathlib import Path +from typing import Tuple + +import cv2 +import numpy as np +import pycuda.driver as cuda + +import tensorrt as trt + +from polystar.common.constants import RESOURCES_DIR +from polystar.common.models.image import Image + + +class TRTModel: + def __init__(self, trt_model_path: Path, input_size: Tuple[int, int]): + self.input_size = input_size + + self.trt_logger = trt.Logger(trt.Logger.INFO) + self._load_plugins() + self.engine = self._load_engine(trt_model_path) + + self.host_inputs = [] + self.cuda_inputs = [] + self.host_outputs = [] + self.cuda_outputs = [] + self.bindings = [] + self.stream = cuda.Stream() + self.context = self._create_context() + + def __call__(self, img: Image) -> np.ndarray: + img_resized = self._preprocess_image(img) + np.copyto(self.host_inputs[0], img_resized.ravel()) + + cuda.memcpy_htod_async(self.cuda_inputs[0], self.host_inputs[0], self.stream) + self.context.execute_async(batch_size=1, bindings=self.bindings, stream_handle=self.stream.handle) + cuda.memcpy_dtoh_async(self.host_outputs[1], self.cuda_outputs[1], self.stream) + cuda.memcpy_dtoh_async(self.host_outputs[0], self.cuda_outputs[0], self.stream) + self.stream.synchronize() + + return self.host_outputs[0].reshape((-1, 7)) + + # Processing + + def _preprocess_image(self, img: Image) -> Image: + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = cv2.resize(img, self.input_size) + img = img.transpose((2, 0, 1)).astype(np.float32) + img = (2.0/255.0) * img - 1.0 + return img + + # Initialization + + def _load_plugins(self): + if trt.__version__[0] < "7": + ctypes.CDLL(str(RESOURCES_DIR / "nano/libflattenconcat.so")) + trt.init_libnvinfer_plugins(self.trt_logger, "") + + def _load_engine(self, trt_model_path: Path): + with trt.Runtime(self.trt_logger) as runtime: + return runtime.deserialize_cuda_engine(trt_model_path.read_bytes()) + + def _create_context(self): + for binding in self.engine: + size = trt.volume(self.engine.get_binding_shape(binding)) * self.engine.max_batch_size + host_mem = cuda.pagelocked_empty(size, np.float32) + cuda_mem = cuda.mem_alloc(host_mem.nbytes) + self.bindings.append(int(cuda_mem)) + if self.engine.binding_is_input(binding): + self.host_inputs.append(host_mem) + self.cuda_inputs.append(cuda_mem) + else: + self.host_outputs.append(host_mem) + self.cuda_outputs.append(cuda_mem) + return self.engine.create_execution_context() + + # Delete + + def __del__(self): + """Free CUDA memories.""" + del self.stream + del self.cuda_outputs + del self.cuda_inputs diff --git a/common/polystar/common/pipeline/objects_detectors/objects_detector_abc.py b/common/polystar/common/pipeline/objects_detectors/objects_detector_abc.py index 911c572e4acdd2c76385bae3dbdaa9970dee1698..7ca9e730897fd858bd6c24137882416abd56d388 100644 --- a/common/polystar/common/pipeline/objects_detectors/objects_detector_abc.py +++ b/common/polystar/common/pipeline/objects_detectors/objects_detector_abc.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from abc import ABC, abstractmethod from typing import List diff --git a/common/polystar/common/pipeline/objects_detectors/trt_model_object_detector.py b/common/polystar/common/pipeline/objects_detectors/trt_model_object_detector.py new file mode 100644 index 0000000000000000000000000000000000000000..29d73ea2688f3d40de21735f9442d42331c23269 --- /dev/null +++ b/common/polystar/common/pipeline/objects_detectors/trt_model_object_detector.py @@ -0,0 +1,58 @@ +from enum import Enum +from typing import List + +import numpy as np + +from polystar.common.models.image import Image +from polystar.common.models.object import Object, ObjectType +from polystar.common.models.trt_model import TRTModel +from polystar.common.pipeline.objects_detectors.objects_detector_abc import ObjectsDetectorABC +from polystar.common.models.label_map import LabelMap + + +class TRTModelObjectsDetector(ObjectsDetectorABC): + def __init__(self, trt_model: TRTModel, label_map: LabelMap): + self.label_map = label_map + self.trt_model = trt_model + + def detect(self, image: Image) -> List[Object]: + results = self.trt_model(image) + return self._construct_objects_from_trt_results(results, image) + + def _construct_object_from_trt_result(self, result: List[float], image_height: int, image_width: int): + xmin = TRTResultGetters.X_MIN.get_value(result) + xmax = TRTResultGetters.X_MAX.get_value(result) + ymin = TRTResultGetters.Y_MIN.get_value(result) + ymax = TRTResultGetters.Y_MAX.get_value(result) + return Object( + type=ObjectType(self.label_map.name_of(TRTResultGetters.CLS.get_value(result))), + confidence=TRTResultGetters.CONF.get_value(result), + x=int(xmin * image_width), + y=int(ymin * image_height), + w=int((xmax - xmin) * image_width), + h=int((ymax - ymin) * image_height), + ) + + def _construct_objects_from_trt_results(self, results: np.ndarray, image: Image): + image_height, image_width, *_ = image.shape + return [ + self._construct_object_from_trt_result(result, image_height, image_width) + for result in results + if TRTResultGetters.CLS.get_value(result) >= 0 + ] + + +class TRTResultGetters(Enum): + CLS = (1, int) + CONF = (2, float) + X_MIN = (3, float) + X_MAX = (5, float) + Y_MIN = (4, float) + Y_MAX = (6, float) + + def __init__(self, offset: int, type_: type): + self.type_ = type_ + self.offset = offset + + def get_value(self, result: List[float]): + return self.type_(result[self.offset]) diff --git a/common/polystar/common/utils/markdown.py b/common/polystar/common/utils/markdown.py index 039ba1261e53f8aa37003a790769847c7feb5988..de2cdb1386aa384b34ee288f57c6870088d1d934 100644 --- a/common/polystar/common/utils/markdown.py +++ b/common/polystar/common/utils/markdown.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from pathlib import Path from typing import TextIO, List @@ -18,25 +16,25 @@ class MarkdownFile: def __exit__(self, exc_type, exc_val, exc_tb): self.file.close() - def title(self, text: str, level: int = 1) -> MarkdownFile: + def title(self, text: str, level: int = 1) -> "MarkdownFile": self.file.write(f'{"#"*level} {text}\n\n') return self - def paragraph(self, text: str) -> MarkdownFile: + def paragraph(self, text: str) -> "MarkdownFile": self.file.write(f"{text}\n\n") return self - def list(self, texts: List[str]) -> MarkdownFile: + def list(self, texts: List[str]) -> "MarkdownFile": for text in texts: self.file.write(f" - {text}\n") self.file.write("\n") return self - def image(self, relative_path: str, alt: str = "img") -> MarkdownFile: + def image(self, relative_path: str, alt: str = "img") -> "MarkdownFile": self.paragraph(f"") return self - def table(self, data: DataFrame) -> MarkdownFile: + def table(self, data: DataFrame) -> "MarkdownFile": self.file.write(tabulate(data, tablefmt="pipe", headers="keys")) self.file.write("\n\n") return self diff --git a/common/polystar/common/utils/tensorflow.py b/common/polystar/common/utils/tensorflow.py index 3974c6558bf8cfdd93e31dc463ef4c79f81399a6..1b5269527c6b45b4e0f2bb7f5d33da1e58bf3b90 100644 --- a/common/polystar/common/utils/tensorflow.py +++ b/common/polystar/common/utils/tensorflow.py @@ -1,11 +1,11 @@ import tensorflow as tf -from object_detection.utils import ops as utils_ops +# from object_detection.utils import ops as utils_ops def patch_tf_v2(): - # patch tf1 into `utils.ops` - utils_ops.tf = tf.compat.v1 + # # patch tf1 into `utils.ops` + # utils_ops.tf = tf.compat.v1 # Patch the location of gfile tf.gfile = tf.io.gfile diff --git a/common/polystar/common/view/bend_object_on_image.py b/common/polystar/common/view/bend_object_on_image.py new file mode 100644 index 0000000000000000000000000000000000000000..b91ce7206d7e4f2e6514bccf60ee66f23b9f2690 --- /dev/null +++ b/common/polystar/common/view/bend_object_on_image.py @@ -0,0 +1,58 @@ +import cv2 +from typing import Tuple + +import numpy as np + +from polystar.common.models.image import Image +from polystar.common.models.object import Object + +_COLORS = [ + [31, 119, 180], + [255, 127, 14], + [44, 160, 44], + [214, 39, 40], + [148, 103, 189], + [140, 86, 75], + [227, 119, 194], + [127, 127, 127], + [188, 189, 34], + [23, 190, 207], +] # seaborn.color_palette() * 255 + +ALPHA = 0.5 +FONT = cv2.FONT_HERSHEY_PLAIN +TEXT_SCALE = 1.0 +TEXT_THICKNESS = 1 +BLACK = (0, 0, 0) +WHITE = (255, 255, 255) + + +def bend_boxed_text_on_image(img: Image, text: str, topleft: Tuple[int, int], color: Tuple[int, int, int]): + assert img.dtype == np.uint8 + img_h, img_w, _ = img.shape + if topleft[0] >= img_w or topleft[1] >= img_h: + return img + margin = 3 + size = cv2.getTextSize(text, FONT, TEXT_SCALE, TEXT_THICKNESS) + w = size[0][0] + margin * 2 + h = size[0][1] + margin * 2 + # the patch is used to draw boxed text + patch = np.zeros((h, w, 3), dtype=np.uint8) + patch[...] = color + cv2.putText(patch, text, (margin+1, h-margin-2), FONT, TEXT_SCALE, + WHITE, thickness=TEXT_THICKNESS, lineType=cv2.LINE_8) + cv2.rectangle(patch, (0, 0), (w-1, h-1), BLACK, thickness=1) + w = min(w, img_w - topleft[0]) # clip overlay at image boundary + h = min(h, img_h - topleft[1]) + # Overlay the boxed text onto region of interest (roi) in img + roi = img[topleft[1]:topleft[1]+h, topleft[0]:topleft[0]+w, :] + cv2.addWeighted(patch[0:h, 0:w, :], ALPHA, roi, 1 - ALPHA, 0, roi) + return img + + +def bend_object_on_image(image: Image, obj: Object): + color = _COLORS[obj.type.value] + cv2.rectangle(image, (obj.x, obj.y), (obj.x + obj.w, obj.y + obj.h), color, 2) + + bend_boxed_text_on_image(image, f"{obj.type.name} ({obj.confidence:.1%})", (obj.x, obj.y), _COLORS[obj.type.value]) + return image diff --git a/common/polystar/common/view/display_image_annotation.py b/common/polystar/common/view/plt_display_image_with_annotation.py similarity index 100% rename from common/polystar/common/view/display_image_annotation.py rename to common/polystar/common/view/plt_display_image_with_annotation.py diff --git a/common/polystar/common/view/display_object_on_image.py b/common/polystar/common/view/plt_display_image_with_object.py similarity index 66% rename from common/polystar/common/view/display_object_on_image.py rename to common/polystar/common/view/plt_display_image_with_object.py index 86ab95af1337141670ba9c59ccf1e5e7788b1e07..075bc85d9812b3c00be55e8a660e39869434cad6 100644 --- a/common/polystar/common/view/display_object_on_image.py +++ b/common/polystar/common/view/plt_display_image_with_object.py @@ -1,6 +1,6 @@ from polystar.common.models.image import Image from polystar.common.models.object import Object -from polystar.common.view.display_image_annotation import display_image_with_objects +from polystar.common.view.plt_display_image_with_annotation import display_image_with_objects def display_object(image: Image, obj: Object): diff --git a/common/research_common/dataset/roco_dataset_descriptor.py b/common/research_common/dataset/roco_dataset_descriptor.py index 5c31c33763e8b723a218ac0024f08498952a8cf9..744177c0a145dce1708cc722b93a9ff172be959b 100644 --- a/common/research_common/dataset/roco_dataset_descriptor.py +++ b/common/research_common/dataset/roco_dataset_descriptor.py @@ -1,13 +1,10 @@ -from __future__ import annotations - -from collections import defaultdict from dataclasses import dataclass, field from pathlib import Path from typing import Dict from pandas import DataFrame -from polystar.common.models.object import ObjectType, ArmorColor, ArmorNumber +from polystar.common.models.object import ObjectType from polystar.common.utils.markdown import MarkdownFile from research_common.dataset.dji.dji_roco_datasets import DJIROCODataset from research_common.dataset.roco_dataset import ROCODataset @@ -28,7 +25,7 @@ class ROCODatasetStats: armors_color2num2count: Dict[str, Dict[int, int]] = field(default_factory=dict) @staticmethod - def from_dataset(dataset: ROCODataset) -> ROCODatasetStats: + def from_dataset(dataset: ROCODataset) -> "ROCODatasetStats": rv = ROCODatasetStats() colors = ["red", "grey", "blue", "total"] rv.armors_color2num2count = {c: {n: 0 for n in range(10)} for c in colors} diff --git a/resources/nano/libflattenconcat.so.5 b/resources/nano/libflattenconcat.so.5 new file mode 100755 index 0000000000000000000000000000000000000000..1bbb2f5300dd1cc7319337c2ac6da2793d7cc340 Binary files /dev/null and b/resources/nano/libflattenconcat.so.5 differ diff --git a/resources/nano/libflattenconcat.so.6 b/resources/nano/libflattenconcat.so.6 new file mode 100755 index 0000000000000000000000000000000000000000..189ec1bec68c76b2639ce2aa5ab2509953fd5888 Binary files /dev/null and b/resources/nano/libflattenconcat.so.6 differ diff --git a/robots-at-robots/config/settings.toml b/robots-at-robots/config/settings.toml index df0e122043f38c9f2a0f09e18a93af917e2701a5..fe532b09ae715c1a30b4413411f26bbc6bf32145 100644 --- a/robots-at-robots/config/settings.toml +++ b/robots-at-robots/config/settings.toml @@ -1,5 +1,5 @@ [default] -MODEL_NAME = 'robots/ssd_mobilenet_v2_coco_2018_03_29' +MODEL_NAME = 'robots/TRT_ssd_mobilenet_v2_roco.bin' [development] diff --git a/robots-at-robots/research/demos/demo_infer.py b/robots-at-robots/research/demos/demo_infer.py index 855308641f38d8252906f39a27eeecbbf8db1e85..4e0a7f237d67847d916a9318e520897fdea3fe2c 100644 --- a/robots-at-robots/research/demos/demo_infer.py +++ b/robots-at-robots/research/demos/demo_infer.py @@ -2,7 +2,7 @@ from polystar.common.models.label_map import LabelMap from polystar.common.pipeline.objects_detectors.tf_model_objects_detector import TFModelObjectsDetector from polystar.common.pipeline.objects_validators.confidence_object_validator import ConfidenceObjectValidator from polystar.common.utils.tensorflow import patch_tf_v2 -from polystar.common.view.display_image_annotation import display_image_with_objects +from polystar.common.view.plt_display_image_with_annotation import display_image_with_objects from polystar.robots_at_robots.dependency_injection import make_injector from research.demos.utils import load_tf_model from research_common.dataset.dji.dji_roco_datasets import DJIROCODataset diff --git a/robots-at-robots/research/demos/demo_pipeline.py b/robots-at-robots/research/demos/demo_pipeline.py index c3ef4696f804475c274f8347f04dec9d8456833b..047aaf2a7d0c120fb5f6dd9e81ffb3636e43a918 100644 --- a/robots-at-robots/research/demos/demo_pipeline.py +++ b/robots-at-robots/research/demos/demo_pipeline.py @@ -10,7 +10,7 @@ from polystar.common.pipeline.objects_validators.type_object_validator import Ty from polystar.common.pipeline.pipeline import Pipeline from polystar.common.pipeline.target_factories.ratio_simple_target_factory import RatioSimpleTargetFactory from polystar.common.utils.tensorflow import patch_tf_v2 -from polystar.common.view.display_object_on_image import display_object +from polystar.common.view.plt_display_image_with_object import display_object from polystar.robots_at_robots.dependency_injection import make_injector from research.demos.utils import load_tf_model from research_common.dataset.dji.dji_roco_datasets import DJIROCODataset diff --git a/robots-at-robots/research/demos/demo_pipeline_camera.py b/robots-at-robots/research/demos/demo_pipeline_camera.py index bde8b72e881b606af5d933d9e0749c904d146ee5..94b8bc8417664e606080264e1ab2928241c9df76 100644 --- a/robots-at-robots/research/demos/demo_pipeline_camera.py +++ b/robots-at-robots/research/demos/demo_pipeline_camera.py @@ -1,37 +1,93 @@ +import subprocess +import sys + import cv2 +from time import time + +import pycuda.autoinit # This is needed for initializing CUDA driver -from polystar.common.models.camera import Camera -from polystar.common.models.object import ObjectType -from polystar.common.pipeline.object_selectors.closest_object_selector import ClosestObjectSelector -from polystar.common.pipeline.objects_detectors.tf_model_objects_detector import TFModelObjectsDetector +from polystar.common.constants import MODELS_DIR +from polystar.common.models.label_map import LabelMap +from polystar.common.models.trt_model import TRTModel +from polystar.common.pipeline.objects_detectors.trt_model_object_detector import TRTModelObjectsDetector from polystar.common.pipeline.objects_validators.confidence_object_validator import ConfidenceObjectValidator -from polystar.common.pipeline.objects_validators.type_object_validator import TypeObjectValidator -from polystar.common.pipeline.pipeline import Pipeline -from polystar.common.pipeline.target_factories.ratio_simple_target_factory import RatioSimpleTargetFactory from polystar.common.utils.tensorflow import patch_tf_v2 +from polystar.common.view.bend_object_on_image import bend_object_on_image, bend_boxed_text_on_image from polystar.robots_at_robots.dependency_injection import make_injector -from research_common.dataset.dji.dji_roco_datasets import DJIROCODataset -from research_common.dataset.split import Split -from research_common.dataset.split_dataset import SplitDataset +from polystar.robots_at_robots.globals import settings + +WINDOWS_NAME = "TensorRT demo" + + +[pycuda.autoinit] # So pycharm won't remove the import + + +def open_cam_onboard(width, height): + """Open the Jetson onboard camera.""" + gst_elements = str(subprocess.check_output("gst-inspect-1.0")) + if "nvcamerasrc" in gst_elements: + # On versions of L4T prior to 28.1, you might need to add + # 'flip-method=2' into gst_str below. + gst_str = ( + "nvcamerasrc ! " + "video/x-raw(memory:NVMM), " + "width=(int)2592, height=(int)1458, " + "format=(string)I420, framerate=(fraction)30/1 ! " + "nvvidconv ! " + "video/x-raw, width=(int){}, height=(int){}, " + "format=(string)BGRx ! " + "videoconvert ! appsink" + ).format(width, height) + elif "nvarguscamerasrc" in gst_elements: + gst_str = ( + "nvarguscamerasrc ! " + "video/x-raw(memory:NVMM), " + f"width=(int){width}, height=(int){height}, " + "format=(string)NV12, framerate=(fraction)60/1 ! " + "nvvidconv flip-method=0 ! " + f"video/x-raw, width=(int){width}, height=(int){height}, " + "format=(string)BGRx ! " + "videoconvert ! appsink" + ) + else: + raise RuntimeError("onboard camera source not found!") + return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER) + if __name__ == "__main__": patch_tf_v2() injector = make_injector() - # objects_detector = injector.get(TFModelObjectsDetector) - # filters = [ConfidenceObjectValidator(confidence_threshold=0.5)] + objects_detector = TRTModelObjectsDetector( + TRTModel(MODELS_DIR / settings.MODEL_NAME, (300, 300)), injector.get(LabelMap) + ) + filters = [ConfidenceObjectValidator(confidence_threshold=0.5)] + + cap = open_cam_onboard(1_280, 720) + + if not cap.isOpened(): + sys.exit("Failed to open camera!") - cap = cv2.VideoCapture(0) + fps = 0 + try: + while True: + previous_time = time() + ret, image = cap.read() + objects = objects_detector.detect(image) + for f in filters: + objects = f.filter(objects, image) - for i, image_path in enumerate(SplitDataset(DJIROCODataset.CentralChina, Split.Test).image_paths): - ret, image = cap.read() - # objects = objects_detector.detect(image) + fps = .9 * fps + .1 / (time() - previous_time) + bend_boxed_text_on_image(image, f'FPS: {fps:.1f}', (10, 10), (0, 0, 0)) - # Display the resulting frame - cv2.imshow("frame", image) - if cv2.waitKey(1) & 0xFF == ord("q"): - break + for obj in objects: + bend_object_on_image(image, obj) - # When everything done, release the capture - cap.release() - cv2.destroyAllWindows() + # Display the resulting frame + cv2.imshow("frame", image) + if cv2.waitKey(1) & 0xFF == ord("q"): + break + finally: + # When everything done, release the capture + cap.release() + cv2.destroyAllWindows()