From 6b054e6deeede66e8554f90e3ed084fd780fcc49 Mon Sep 17 00:00:00 2001 From: Mathieu Beligon <mathieu@feedly.com> Date: Wed, 23 Sep 2020 17:15:16 +0200 Subject: [PATCH] [robots] (report) add some graphs --- .../red_blue_channels_comparison_model.py | 4 +- common/polystar/common/utils/dataframe.py | 16 ++- .../image_pipeline_evaluation_reporter.py | 125 +++++++++++++++--- .../evaluation/image_pipeline_evaluator.py | 11 +- 4 files changed, 122 insertions(+), 34 deletions(-) diff --git a/common/polystar/common/image_pipeline/models/red_blue_channels_comparison_model.py b/common/polystar/common/image_pipeline/models/red_blue_channels_comparison_model.py index 78cb00e..d7b2416 100644 --- a/common/polystar/common/image_pipeline/models/red_blue_channels_comparison_model.py +++ b/common/polystar/common/image_pipeline/models/red_blue_channels_comparison_model.py @@ -14,7 +14,7 @@ class RedBlueComparisonModel(AbsoluteClassifierModelABC): blue_channel_id: int = 2 def __post_init__(self): - self.labels_ = np.asarray(sorted(["Red", "Grey", "Blue"])) + self.labels_ = np.asarray(sorted(["red", "grey", "blue"])) self.label2index_ = {label: i for i, label in enumerate(self.labels_)} def fit(self, features: List[Any], labels: List[Any]) -> "RedBlueComparisonModel": @@ -22,7 +22,7 @@ class RedBlueComparisonModel(AbsoluteClassifierModelABC): def predict(self, features: List[Tuple[float, float, float]]) -> List[str]: return [ - "Red" if feature[self.red_channel_id] >= feature[self.blue_channel_id] else "Blue" for feature in features + "red" if feature[self.red_channel_id] >= feature[self.blue_channel_id] else "blue" for feature in features ] def __str__(self) -> str: diff --git a/common/polystar/common/utils/dataframe.py b/common/polystar/common/utils/dataframe.py index 6d25f92..814915c 100644 --- a/common/polystar/common/utils/dataframe.py +++ b/common/polystar/common/utils/dataframe.py @@ -1,27 +1,29 @@ -from typing import Any, Iterable, Callable, Union +from typing import Any, Callable, Iterable, Union from pandas import DataFrame +Format = Union[str, Callable] -def format_df_column(df: DataFrame, column_name: str, fmt: Union[Callable, str]): + +def format_df_column(df: DataFrame, column_name: str, fmt: Format): df[column_name] = df[column_name].map(fmt.format) -def format_df_columns(df: DataFrame, column_names: Iterable[str], fmt: Union[Callable, str]): +def format_df_columns(df: DataFrame, column_names: Iterable[str], fmt: Format): for c in column_names: format_df_column(df, c, fmt) -def format_df_row(df: DataFrame, loc: Any, fmt: Union[Callable, str]): - df.loc[loc] = df.loc[loc].map(_make_formater(fmt)) +def format_df_row(df: DataFrame, loc: Any, fmt: Format): + df.loc[loc] = df.loc[loc].map(make_formater(fmt)) -def format_df_rows(df: DataFrame, locs: Iterable[Any], fmt: Union[Callable, str]): +def format_df_rows(df: DataFrame, locs: Iterable[Any], fmt: Format): for loc in locs: format_df_row(df, loc, fmt) -def _make_formater(fmt: Union[Callable, str]) -> Callable: +def make_formater(fmt: Format) -> Callable: if isinstance(fmt, str): return fmt.format return fmt diff --git a/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py b/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py index dadcd94..b65d074 100644 --- a/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py +++ b/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py @@ -1,13 +1,17 @@ from collections import Counter from dataclasses import dataclass +from math import log from os.path import relpath from pathlib import Path -from typing import Any, Dict, Iterable, List, Tuple, Generic +from typing import Any, Dict, Generic, Iterable, List, Optional, Tuple +import matplotlib.pyplot as plt import numpy as np +from matplotlib.axes import Axes from pandas import DataFrame + from polystar.common.image_pipeline.image_pipeline import ImagePipeline -from polystar.common.utils.dataframe import format_df_column, format_df_row, format_df_rows +from polystar.common.utils.dataframe import Format, format_df_column, format_df_row, format_df_rows, make_formater from polystar.common.utils.markdown import MarkdownFile from polystar.common.utils.time import create_time_id from research.common.constants import DSET_DIR, EVALUATION_DIR @@ -30,15 +34,12 @@ class ImagePipelineEvaluationReporter(Generic[ValueT]): pipeline2results = self.evaluator.evaluate_pipelines(pipelines) - with MarkdownFile( - EVALUATION_DIR / self.evaluation_project / f"{evaluation_short_name}_{create_time_id()}.md" - ) as mf: - mf.title(f"Evaluation report {evaluation_short_name}") + report_dir = EVALUATION_DIR / self.evaluation_project / f"{evaluation_short_name}_{create_time_id()}" + with MarkdownFile(report_dir / "report.md") as mf: + mf.title(f"Evaluation report {evaluation_short_name}") self._report_datasets(mf) - - self._report_aggregated_results(mf, pipeline2results) - + self._report_aggregated_results(mf, pipeline2results, report_dir) self._report_pipelines_results(mf, pipeline2results) def _report_datasets(self, mf: MarkdownFile): @@ -70,11 +71,22 @@ class ImagePipelineEvaluationReporter(Generic[ValueT]): df["Repartition"] = (df["Total"] / total).map("{:.1%}".format) mf.table(df) - def _report_aggregated_results(self, mf: MarkdownFile, pipeline2results: Dict[str, ClassificationResults[ValueT]]): - aggregated_results = self._aggregate_results(pipeline2results) + def _report_aggregated_results( + self, mf: MarkdownFile, pipeline2results: Dict[str, ClassificationResults[ValueT]], report_dir: Path + ): + fig, (ax_test, ax_train) = plt.subplots(1, 2, figsize=(16, 5)) + aggregated_test_results = self._aggregate_results(pipeline2results, ax_test, "test") + aggregated_train_results = self._aggregate_results(pipeline2results, ax_train, "train") + fig.tight_layout() + aggregated_image_name = "aggregated_test_results.png" + fig.savefig(report_dir / aggregated_image_name, transparent=True) + mf.title("Aggregated results", level=2) + mf.image(aggregated_image_name) mf.paragraph("On test set:") - mf.table(aggregated_results) + mf.table(aggregated_test_results) + mf.paragraph("On train set:") + mf.table(aggregated_train_results) def _report_pipelines_results(self, mf: MarkdownFile, pipeline2results: Dict[str, ClassificationResults[ValueT]]): for pipeline_name, results in pipeline2results.items(): @@ -124,19 +136,90 @@ class ImagePipelineEvaluationReporter(Generic[ValueT]): ).set_index("images") ) - def _aggregate_results(self, pipeline2results: Dict[str, ClassificationResults[ValueT]]) -> DataFrame: + def _aggregate_results( + self, pipeline2results: Dict[str, ClassificationResults[ValueT]], ax: Axes, set_: str + ) -> DataFrame: main_metric_name = f"{self.main_metric[0]} {self.main_metric[1]}" - df = DataFrame(columns=["pipeline", main_metric_name, "inf time"]).set_index("pipeline") - - for pipeline_name, results in pipeline2results.items(): - df.loc[pipeline_name] = [ - results.test_results.report[self.main_metric[1]][self.main_metric[0]], - results.test_results.mean_inference_time, - ] + df = ( + DataFrame.from_records( + [ + ( + pipeline_name, + results.on_set(set_).report[self.main_metric[1]][self.main_metric[0]], + results.on_set(set_).mean_inference_time, + ) + for pipeline_name, results in pipeline2results.items() + ], + columns=["pipeline", main_metric_name, "inf time"], + ) + .set_index("pipeline") + .sort_values(main_metric_name, ascending=False) + ) - df = df.sort_values(main_metric_name, ascending=False) + bar_plot_with_secondary(df, set_.title(), fmt_y1="{:.1%}", fmt_y2="{:.1e}", y2_log=True, ax=ax) format_df_column(df, main_metric_name, "{:.1%}") format_df_column(df, "inf time", "{:.2e}") return df + + +def bar_plot_with_secondary( + df: DataFrame, + title: str, + fmt_y1: Format = str, + fmt_y2: Format = str, + y1_log: bool = False, + y2_log: bool = False, + limits_y1: Tuple[float, float] = None, + limits_y2: Tuple[float, float] = None, + ax: Axes = None, +): + if ax is None: + (_, ax) = plt.subplots() + + y1, y2 = df.columns + + df.plot.bar(rot=0, ax=ax, secondary_y=y2, legend=False, title=title) + + ax1, ax2 = ax, plt.gcf().get_axes()[-1] + + _format_ax(ax1, y1, fmt_y1, y1_log, limits_y1) + _format_ax(ax2, y2, fmt_y2, y2_log, limits_y2) + + _legend_with_secondary(ax1, ax2) + + +def _legend_with_secondary(ax1: Axes, ax2: Axes): + lines_1, labels_1 = ax1.get_legend_handles_labels() + lines_2, labels_2 = ax2.get_legend_handles_labels() + lines = lines_1 + lines_2 + labels = labels_1 + labels_2 + ax1.legend(lines, labels, loc=0) + + +def _format_ax(ax: Axes, label: str, fmt: Format, log_scale: bool, limits: Optional[Tuple[float, float]]): + ax.set_ylabel(label) + + if limits: + ax.set_ylim(*limits) + + if log_scale: + ax.set_yscale("log") + + m, _ = ax.get_ylim() + + fmt = make_formater(fmt) + + for p in ax.patches: + if log_scale: + h = pow(10, 0.5 * (log(p.get_height(), 10) + log(m, 10))) + else: + h = 0.6 * p.get_height() + ax.annotate( + fmt(p.get_height()), + (p.get_x() + p.get_width() / 2.0, h), + ha="center", + va="center", + textcoords="offset points", + ) diff --git a/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluator.py b/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluator.py index 27afedb..30e3092 100644 --- a/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluator.py +++ b/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluator.py @@ -8,10 +8,8 @@ import numpy as np from memoized_property import memoized_property from polystar.common.image_pipeline.image_pipeline import ImagePipeline from polystar.common.models.image import Image, load_images -from research.common.datasets.roco.directory_roco_dataset import \ - DirectoryROCODataset -from research.robots_at_robots.dataset.armor_value_dataset import ( - ArmorValueDatasetCache, ValueT) +from research.common.datasets.roco.directory_roco_dataset import DirectoryROCODataset +from research.robots_at_robots.dataset.armor_value_dataset import ArmorValueDatasetCache, ValueT from sklearn.metrics import classification_report, confusion_matrix @@ -44,6 +42,11 @@ class ClassificationResults(Generic[ValueT]): test_results: SetClassificationResults[ValueT] full_pipeline_name: str + def on_set(self, set_: str) -> SetClassificationResults[ValueT]: + if set_ is "train": + return self.train_results + return self.test_results + class ImagePipelineEvaluator(Generic[ValueT]): def __init__( -- GitLab