From 184c7419b611a3e140fde343eccb30873f0e3650 Mon Sep 17 00:00:00 2001 From: Mathieu Beligon <mathieu@feedly.com> Date: Tue, 31 Mar 2020 22:01:18 -0400 Subject: [PATCH] [common] (image pipeline evaluation) Add mistake examples in the report --- .../image_dataset_generator.py | 16 ++++--- .../image_pipeline_evaluation_reporter.py | 36 +++++++++++++--- .../image_pipeline_evaluator.py | 43 ++++++++++++------- .../dataset/armor_image_dataset_factory.py | 6 +-- 4 files changed, 69 insertions(+), 32 deletions(-) diff --git a/common/research_common/image_pipeline_evaluation/image_dataset_generator.py b/common/research_common/image_pipeline_evaluation/image_dataset_generator.py index b2713f6..c284877 100644 --- a/common/research_common/image_pipeline_evaluation/image_dataset_generator.py +++ b/common/research_common/image_pipeline_evaluation/image_dataset_generator.py @@ -1,4 +1,5 @@ from abc import abstractmethod +from pathlib import Path from typing import TypeVar, Generic, Tuple, List, Iterable from polystar.common.models.image import Image @@ -8,16 +9,19 @@ T = TypeVar("T") class ImageDatasetGenerator(Generic[T]): - def from_roco_datasets(self, datasets: Iterable[DirectoryROCODataset]) -> Tuple[List[Image], List[T], List[int]]: - images, labels, dataset_sizes = [], [], [] + def from_roco_datasets( + self, datasets: Iterable[DirectoryROCODataset] + ) -> Tuple[List[Path], List[Image], List[T], List[int]]: + images_path, images, labels, dataset_sizes = [], [], [], [] for dataset in datasets: prev_total_size = len(images) - for img, label in self.from_roco_dataset(dataset): - images.append(img) + for img_path, label in self.from_roco_dataset(dataset): + images_path.append(img_path) + images.append(Image.from_path(img_path)) labels.append(label) dataset_sizes.append(len(images) - prev_total_size) - return images, labels, dataset_sizes + return images_path, images, labels, dataset_sizes @abstractmethod - def from_roco_dataset(self, dataset: DirectoryROCODataset) -> Iterable[Tuple[Image, T]]: + def from_roco_dataset(self, dataset: DirectoryROCODataset) -> Iterable[Tuple[Path, T]]: pass diff --git a/common/research_common/image_pipeline_evaluation/image_pipeline_evaluation_reporter.py b/common/research_common/image_pipeline_evaluation/image_pipeline_evaluation_reporter.py index c8e3e0d..336a970 100644 --- a/common/research_common/image_pipeline_evaluation/image_pipeline_evaluation_reporter.py +++ b/common/research_common/image_pipeline_evaluation/image_pipeline_evaluation_reporter.py @@ -1,5 +1,7 @@ from collections import Counter from dataclasses import dataclass +from os.path import relpath +from pathlib import Path from typing import Iterable, List, Any, Dict, Tuple import numpy as np @@ -9,7 +11,7 @@ from polystar.common.image_pipeline.image_pipeline import ImagePipeline from polystar.common.utils.dataframe import format_df_rows, format_df_row, format_df_column from polystar.common.utils.markdown import MarkdownFile from polystar.common.utils.time import create_time_id -from research_common.constants import EVALUATION_DIR +from research_common.constants import EVALUATION_DIR, DSET_DIR from research_common.dataset.roco_dataset import ROCODataset from research_common.image_pipeline_evaluation.image_pipeline_evaluator import ( ImagePipelineEvaluator, @@ -78,27 +80,47 @@ class ImagePipelineEvaluationReporter: for pipeline_name, results in pipeline2results.items(): self._report_pipeline_results(mf, pipeline_name, results) - @staticmethod - def _report_pipeline_results(mf: MarkdownFile, pipeline_name: str, results: ClassificationResults): + def _report_pipeline_results(self, mf: MarkdownFile, pipeline_name: str, results: ClassificationResults): mf.title(pipeline_name, level=2) mf.paragraph(results.full_pipeline_name) mf.title("Train results", level=3) - ImagePipelineEvaluationReporter._report_pipeline_set_results(mf, results.train_results) + ImagePipelineEvaluationReporter._report_pipeline_set_results( + mf, results.train_results, self.evaluator.train_images_paths + ) mf.title("Test results", level=3) - ImagePipelineEvaluationReporter._report_pipeline_set_results(mf, results.test_results) + ImagePipelineEvaluationReporter._report_pipeline_set_results( + mf, results.test_results, self.evaluator.test_images_paths + ) @staticmethod - def _report_pipeline_set_results(mf: MarkdownFile, results: SetClassificationResults): + def _report_pipeline_set_results(mf: MarkdownFile, results: SetClassificationResults, image_paths: List[Path]): + mf.title("Metrics", level=4) mf.paragraph(f"Inference time: {results.mean_inference_time: .2e} s/img") df = DataFrame(results.report) format_df_rows(df, ["precision", "recall", "f1-score"], "{:.1%}") format_df_row(df, "support", int) mf.table(df) - mf.paragraph("Confusion Matrix:") + mf.title("Confusion Matrix:", level=4) mf.table(DataFrame(results.confusion_matrix)) + mf.title("10 Mistakes examples", level=4) + mistakes_idx = np.random.choice(results.mistakes, min(len(results.mistakes), 10), replace=False) + relative_paths = [ + f", str(mf.markdown_path.parent))})" for idx in mistakes_idx + ] + images_names = [image_paths[idx].relative_to(DSET_DIR) for idx in mistakes_idx] + mf.table( + DataFrame( + { + "images": relative_paths, + "labels": results.labels[mistakes_idx], + "predictions": results.predictions[mistakes_idx], + "image names": images_names, + } + ).set_index("images") + ) def _aggregate_results(self, pipeline2results: Dict[str, ClassificationResults]) -> DataFrame: main_metric_name = f"{self.main_metric[0]} {self.main_metric[1]}" diff --git a/common/research_common/image_pipeline_evaluation/image_pipeline_evaluator.py b/common/research_common/image_pipeline_evaluation/image_pipeline_evaluator.py index 021508c..24fdfa6 100644 --- a/common/research_common/image_pipeline_evaluation/image_pipeline_evaluator.py +++ b/common/research_common/image_pipeline_evaluation/image_pipeline_evaluator.py @@ -3,6 +3,7 @@ from dataclasses import dataclass from time import time from typing import List, Dict, Any, Iterable, Sequence +import numpy as np from sklearn.metrics import classification_report, confusion_matrix from polystar.common.image_pipeline.image_pipeline import ImagePipeline @@ -13,17 +14,21 @@ from research_common.image_pipeline_evaluation.image_dataset_generator import Im @dataclass class SetClassificationResults: - report: Dict - confusion_matrix: Dict + labels: np.ndarray + predictions: np.ndarray mean_inference_time: float - @classmethod - def from_labels_and_time(cls, labels: Sequence[Any], preds: Sequence[Any], mean_time: float): - return cls( - report=classification_report(labels, preds, output_dict=True), - confusion_matrix=confusion_matrix(labels, preds), - mean_inference_time=mean_time, - ) + @property + def report(self) -> Dict: + return classification_report(self.labels, self.predictions, output_dict=True) + + @property + def confusion_matrix(self) -> Dict: + return confusion_matrix(self.labels, self.predictions) + + @property + def mistakes(self) -> Sequence[int]: + return np.where(self.labels != self.predictions)[0] @dataclass @@ -43,12 +48,18 @@ class ImagePipelineEvaluator: logging.info("Loading data") self.train_roco_datasets = train_roco_datasets self.test_roco_datasets = test_roco_datasets - self.train_images, self.train_labels, self.train_dataset_sizes = image_dataset_generator.from_roco_datasets( - train_roco_datasets - ) - self.test_images, self.test_labels, self.test_dataset_sizes = image_dataset_generator.from_roco_datasets( - test_roco_datasets - ) + ( + self.train_images_paths, + self.train_images, + self.train_labels, + self.train_dataset_sizes, + ) = image_dataset_generator.from_roco_datasets(train_roco_datasets) + ( + self.test_images_paths, + self.test_images, + self.test_labels, + self.test_dataset_sizes, + ) = image_dataset_generator.from_roco_datasets(test_roco_datasets) def evaluate_pipelines(self, pipelines: Iterable[ImagePipeline]) -> Dict[str, ClassificationResults]: return {str(pipeline): self.evaluate(pipeline) for pipeline in pipelines} @@ -70,4 +81,4 @@ class ImagePipelineEvaluator: t = time() preds = pipeline.predict(images) mean_time = (time() - t) / len(images) - return SetClassificationResults.from_labels_and_time(labels, preds, mean_time) + return SetClassificationResults(np.asarray(labels), np.asarray(preds), mean_time) diff --git a/robots-at-robots/research/dataset/armor_image_dataset_factory.py b/robots-at-robots/research/dataset/armor_image_dataset_factory.py index b211c63..dfdf35b 100644 --- a/robots-at-robots/research/dataset/armor_image_dataset_factory.py +++ b/robots-at-robots/research/dataset/armor_image_dataset_factory.py @@ -21,7 +21,7 @@ class ArmorImageDatasetGenerator(ImageDatasetGenerator[T]): def from_roco_dataset(self, dataset: DirectoryROCODataset) -> Iterable[Tuple[Image, T]]: if not (dataset.dataset_path / self.task_name / ".lock").exists(): self._create_labelized_armor_images_from_roco(dataset) - return self._get_saved_images_and_labels(dataset) + return self._get_images_paths_and_labels(dataset) def _create_labelized_armor_images_from_roco(self, dataset): dset_path = dataset.dataset_path / self.task_name @@ -33,9 +33,9 @@ class ArmorImageDatasetGenerator(ImageDatasetGenerator[T]): json.dumps({"version": "0.0", "date": create_time_id()}) ) - def _get_saved_images_and_labels(self, dataset: DirectoryROCODataset) -> Iterable[Tuple[Image, T]]: + def _get_images_paths_and_labels(self, dataset: DirectoryROCODataset) -> Iterable[Tuple[Image, T]]: return ( - (Image.from_path(image_path), self._label_from_filepath(image_path)) + (image_path, self._label_from_filepath(image_path)) for image_path in (dataset.dataset_path / self.task_name).glob("*.jpg") if self._valid_label(self._label_from_filepath(image_path)) ) -- GitLab