From 01996ef7b29ac29817439081924cb65c7fe05495 Mon Sep 17 00:00:00 2001 From: Mathieu Beligon <mathieu@feedly.com> Date: Mon, 14 Dec 2020 20:29:37 +0100 Subject: [PATCH] [robots@robots] (evaluation) refactor the evaluation framework --- .../image_pipeline/preprocessors/normalise.py | 7 + .../image_pipeline/preprocessors/resize.py | 14 + common/polystar/common/models/image.py | 6 +- .../classification/classification_pipeline.py | 9 +- .../polystar/common/utils/iterable_utils.py | 21 +- common/polystar/common/utils/markdown.py | 9 +- .../research/common/datasets/image_dataset.py | 5 +- .../digits/.changes | Bin 31330 -> 75097 bytes .../armor_color/armor_color_benchmarker.py | 20 ++ .../armor_color_pipeline_reporter_factory.py | 24 -- .../{baseline_experiments.py => benchmark.py} | 14 +- .../armor_digit/armor_digit_benchmarker.py | 20 ++ .../armor_digit_pipeline_reporter_factory.py | 24 -- .../robots_at_robots/armor_digit/benchmark.py | 57 ++-- .../armor_digit/clean_datasets.py | 13 +- .../dataset/armor_value_dataset_generator.py | 8 +- .../robots_at_robots/demos/demo_pipeline.py | 2 +- .../robots_at_robots/evaluation/benchmark.py | 49 +++ .../image_pipeline_evaluation_reporter.py | 306 ++++++++++-------- .../evaluation/image_pipeline_evaluator.py | 130 +++----- .../evaluation/metrics/__init__.py | 0 .../evaluation/metrics/accuracy.py | 11 + .../robots_at_robots/evaluation/metrics/f1.py | 30 ++ .../evaluation/metrics/metric_abc.py | 17 + .../evaluation/performance.py | 79 +++++ .../robots_at_robots/evaluation/set.py | 14 + .../robots_at_robots/evaluation/trainer.py | 25 ++ 27 files changed, 590 insertions(+), 324 deletions(-) create mode 100644 common/polystar/common/image_pipeline/preprocessors/normalise.py create mode 100644 common/polystar/common/image_pipeline/preprocessors/resize.py create mode 100644 robots-at-robots/research/robots_at_robots/armor_color/armor_color_benchmarker.py delete mode 100644 robots-at-robots/research/robots_at_robots/armor_color/armor_color_pipeline_reporter_factory.py rename robots-at-robots/research/robots_at_robots/armor_color/{baseline_experiments.py => benchmark.py} (85%) create mode 100644 robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_benchmarker.py delete mode 100644 robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_pipeline_reporter_factory.py create mode 100644 robots-at-robots/research/robots_at_robots/evaluation/benchmark.py create mode 100644 robots-at-robots/research/robots_at_robots/evaluation/metrics/__init__.py create mode 100644 robots-at-robots/research/robots_at_robots/evaluation/metrics/accuracy.py create mode 100644 robots-at-robots/research/robots_at_robots/evaluation/metrics/f1.py create mode 100644 robots-at-robots/research/robots_at_robots/evaluation/metrics/metric_abc.py create mode 100644 robots-at-robots/research/robots_at_robots/evaluation/performance.py create mode 100644 robots-at-robots/research/robots_at_robots/evaluation/set.py create mode 100644 robots-at-robots/research/robots_at_robots/evaluation/trainer.py diff --git a/common/polystar/common/image_pipeline/preprocessors/normalise.py b/common/polystar/common/image_pipeline/preprocessors/normalise.py new file mode 100644 index 0000000..a00c8d0 --- /dev/null +++ b/common/polystar/common/image_pipeline/preprocessors/normalise.py @@ -0,0 +1,7 @@ +from polystar.common.models.image import Image +from polystar.common.pipeline.pipe_abc import PipeABC + + +class Normalise(PipeABC): + def transform_single(self, image: Image) -> Image: + return image / 255 diff --git a/common/polystar/common/image_pipeline/preprocessors/resize.py b/common/polystar/common/image_pipeline/preprocessors/resize.py new file mode 100644 index 0000000..6afbc2b --- /dev/null +++ b/common/polystar/common/image_pipeline/preprocessors/resize.py @@ -0,0 +1,14 @@ +from typing import Tuple + +from cv2.cv2 import resize + +from polystar.common.models.image import Image +from polystar.common.pipeline.pipe_abc import PipeABC + + +class Resize(PipeABC): + def __init__(self, size: Tuple[int, int]): + self.size = size + + def transform_single(self, image: Image) -> Image: + return resize(image, self.size) diff --git a/common/polystar/common/models/image.py b/common/polystar/common/models/image.py index 4d598f5..29a0b13 100644 --- a/common/polystar/common/models/image.py +++ b/common/polystar/common/models/image.py @@ -1,6 +1,6 @@ from dataclasses import dataclass, field from pathlib import Path -from typing import Iterable +from typing import Iterable, List import cv2 import numpy as np @@ -38,3 +38,7 @@ def load_images_in_directory( def save_image(image: Image, image_path: Path, conversion: int = cv2.COLOR_RGB2BGR): image_path.parent.mkdir(exist_ok=True, parents=True) cv2.imwrite(str(image_path), cv2.cvtColor(image, conversion)) + + +def file_images_to_images(file_images: Iterable[FileImage]) -> List[Image]: + return [np.asarray(file_image) for file_image in file_images] diff --git a/common/polystar/common/pipeline/classification/classification_pipeline.py b/common/polystar/common/pipeline/classification/classification_pipeline.py index 074cc15..99c85f5 100644 --- a/common/polystar/common/pipeline/classification/classification_pipeline.py +++ b/common/polystar/common/pipeline/classification/classification_pipeline.py @@ -2,7 +2,7 @@ from abc import ABC from enum import IntEnum from typing import ClassVar, Generic, List, Sequence, Tuple, TypeVar -from numpy import asarray, ndarray +from numpy import asarray, ndarray, pad from polystar.common.pipeline.classification.classifier_abc import ClassifierABC from polystar.common.pipeline.pipe_abc import IT, PipeABC @@ -29,6 +29,13 @@ class ClassificationPipeline(Pipeline, Generic[IT, EnumT], ABC): def predict(self, x: Sequence[IT]) -> List[EnumT]: return self.predict_proba_and_classes(x)[1] + def predict_proba(self, x: Sequence[IT]) -> ndarray: + proba = super().predict_proba(x) + missing_classes = self.classifier.n_classes - proba.shape[1] + if not missing_classes: + return proba + return pad(proba, ((0, 0), (0, missing_classes))) + def predict_proba_and_classes(self, x: Sequence[IT]) -> Tuple[ndarray, List[EnumT]]: proba = asarray(self.predict_proba(x)) indices = proba.argmax(axis=1) diff --git a/common/polystar/common/utils/iterable_utils.py b/common/polystar/common/utils/iterable_utils.py index a004688..01bc2da 100644 --- a/common/polystar/common/utils/iterable_utils.py +++ b/common/polystar/common/utils/iterable_utils.py @@ -1,4 +1,6 @@ -from typing import Iterable +from collections import defaultdict +from itertools import chain +from typing import Callable, Dict, Iterable, List, TypeVar from more_itertools import ilen @@ -8,3 +10,20 @@ def smart_len(it: Iterable) -> int: return len(it) except AttributeError: return ilen(it) + + +T = TypeVar("T") + + +def flatten(it: Iterable[Iterable[T]]) -> List[T]: + return list(chain.from_iterable(it)) + + +U = TypeVar("U") + + +def group_by(it: Iterable[T], key: Callable[[T], U]) -> Dict[U, List[T]]: + rv = defaultdict(list) + for item in it: + rv[key(item)].append(item) + return rv diff --git a/common/polystar/common/utils/markdown.py b/common/polystar/common/utils/markdown.py index 79a9d83..3997375 100644 --- a/common/polystar/common/utils/markdown.py +++ b/common/polystar/common/utils/markdown.py @@ -1,6 +1,7 @@ from pathlib import Path -from typing import TextIO, Iterable, Any +from typing import Any, Iterable, TextIO +from matplotlib.figure import Figure from pandas import DataFrame from tabulate import tabulate @@ -35,7 +36,11 @@ class MarkdownFile: self.paragraph(f"") return self + def figure(self, figure: Figure, name: str, alt: str = "img"): + figure.savefig(self.markdown_path.parent / name) + return self.image(name, alt) + def table(self, data: DataFrame) -> "MarkdownFile": - self.file.write(tabulate(data, tablefmt="pipe", headers="keys")) + self.file.write(tabulate(data, tablefmt="pipe", headers="keys").replace(".0 ", " ")) self.file.write("\n\n") return self diff --git a/common/research/common/datasets/image_dataset.py b/common/research/common/datasets/image_dataset.py index 13bb5a5..9378439 100644 --- a/common/research/common/datasets/image_dataset.py +++ b/common/research/common/datasets/image_dataset.py @@ -1,6 +1,6 @@ from pathlib import Path -from polystar.common.models.image import Image +from polystar.common.models.image import FileImage, Image from research.common.datasets.dataset import Dataset from research.common.datasets.lazy_dataset import LazyDataset, TargetT @@ -9,3 +9,6 @@ FileDataset = Dataset[Path, TargetT] LazyImageDataset = LazyDataset[Image, TargetT] ImageDataset = Dataset[Image, TargetT] + +LazyFileImageDataset = LazyDataset[FileImage, TargetT] +FileImageDataset = Dataset[FileImage, TargetT] diff --git a/dataset/dji_roco/robomaster_Final Tournament/digits/.changes b/dataset/dji_roco/robomaster_Final Tournament/digits/.changes index 35d75bb3ff7e3bcf5ffc930deef3313e092ba0fe..094f578582ac0df9123e392b0be3aec31d5a695b 100644 GIT binary patch literal 75097 zcmb`QO^+nUbw&636$Do{;5NFlsz1t08icKdErAwBfGndygETz`!7)gVHiG`U&y8YB z0;?i3?`Z>rSv2z2i+J(A<K@dg{^FBQK7Ie)AHMwd{a0W9`1Mzx{=+AK<WKx@xnJ%N z%i;dz@U%QF%O`*HFJJ!n-|xTv_{nEqfBXKM58wSQkN*33l!xE__T?Y{`S|(2{^s*9 ze*MKSfA{&jAAa-Y$L~M>@bcNOmY2iJ;r{;aaCi9o|M}l<KK;7_bE+|iyK{|M?hgIp z0P(Nf(eds8BWe(^8uL(NPIr$r1}X+~zFW!-J>2z7U^{omlFs8@&A8{gQ%PpIH<0t) zsoKl}$65w}S?(TMGN%@TUbk#sPIpf&8EC-iZZ}{KJ*GYzWOHey=J5_u>vr=L4wUEl zj0kk<xjdsa<;vh#4`Dmq{?D+igB;_jI;#;#>kE$9&6W(1*4jd*W&m<op3D)O(Xtui z)LQf->nepANbA&&Y~|b@4T4!8Czw{Aj~wRG9u5IfEjDM2ZKpT*z>;a`e~UoT+O3?> zOq_aUHcmr0I~#byryW~5$mVm)gU4D4PDoA70x+%Rc(}VS9mm6cZJr(wfYzlwAOo#> zK8WZknAQ+JAUADJ5KQZE9+0(GLm!?v(asD!)-v#jrC~pnf`VDvC=JY0scVmDK<f*G zX<g2vFwtNL)GDq$<#0MiKRv^=lFBnFco#rDvH$xX)4HhV-r775Yt!)z4B2XRGS6sp z8w~~18k=WfY@U-3l-t3nm4XJ9)(8M;J=8OWhf<u+s7$&2K#+k-0)lD72QaM)oDV>{ zOzn)2wPxdt1?;#+l@JS1F>OFP>l8?s)`)X3Lp_@V4d~SRsDNNx=h*|QJ!n9+p8|x> zt)yo_IyV)_eObf}218vl4iJ@#LQg|znB7u<0(xwvE}+i9WqBCh*9zV!wExGY-u_qR z{PSUXcz8oOKU9VvX*ma|KOzcCl`F~Wk|H%e8w^HHKx!NJP=*Gq?3oHiW3iVPLSyJu zV!bRq6?j~-j2dp2hq4zArYrh@8L3Vu0ARwlj%nE4meJG-yGVC=0n*icfE>!Q7m$?w zy8RzV+R|ZCj%l?9%(-0J39)I9Gng*YU0#w^w{#vb>Rkn6d3i+DTATKWD`?mD2$7Zt zmn^wcH!$6m2`orWvJ<wa1!b!tUFx#DJOL;T;mE|xg*_3gbYTaHXqW9QFF;yr`<$wL z=b)Y+*h>lF>`Nw-AGW6tq!mk2%GN)Vx4njF`<fw^7a%niPQR<iLMDEr6EYyJaXv?3 zy5k2@Q|X0_RJSW#!7&^_>(GN~jm%l(cwkzK38YnkbILHCEjtr9wB9Tj_;xEm`{frO z{`13+?|%6F4<Ek$_~ZKz-~H5eG_Bms0h0>(95A`Ungb@K!5QE%3m4~rNq98{q#32+ zYicMHnV%Dh&AVAhUV}`;cMe&Q_;N4PoMcWV7hN~4<Q$UTjdJ_Z-V!n`)y@|d_jOZy zr;wBoOT@HpG$%3*-PRzJJZTO&&O(GW2vPM##%>i@4y519)BUb~$vF`;^3`|Gh#b$S z)1E5M8j(pK-W-w}r1I<&xZQO2b1Ku2Y6|J4B{^khDTt;@_i&gu`ptJTjr8Uavb@q# zQ0pk2=^^ceuQK_wuywvNFq24Uju|Ovc{Hb4ax^Cc%St0oEou^k&#B;!Cf5yXn8_W@ z9Fp6!PRT~kUgyMsw0jvc(=CiDRH>L0$R<halwm+xw+CcWjh|CVi&N*q(|y*O*kIbA z56mP5pYH~X@#^{Z8US>;JsKLRT8Ba9u7;q=rN*2dYxNh031rf8GCdf&s3_WXS{BTt zc0H%V=}!t5Ynavnq+6@a6@qCMI+!*FNQYM&`-7R})N`)&{_G)1N#`|zz=9^DK2Yzj zsgT9AR1!0Gc9WudP32tg1q;|3XQZ;+{bRE0?pT1dmYExcmSI5J02oMFAvpkPYb3ep zXe|bqdjE0fUC6|J%A}w=)&2BCmc~4g$xY81nKu4~OfAb8*m^f$T6f0f5Er<WDVyIX zO?d{JTlELhMvXm}xq)h(5|BwjeZKpMe`lATMl{nE48XKm7T0NQHv2>-({5(kKs;wz zF73+g=UuAK$jtAPrwaqpie=W-oj(1xb@B_QJ$)c;zMQt+n#;J0)?=hcw+&0d)JsE@ zTWRLncJ`XnTxv!-R|-Mz0qIh11LkM+zcm5DOs>@D8~`)9BAjFJ0c{feJiYColR=@0 z=_(&MTBy6kk^63Bt+Q!B;6b%AoKuZx9onVmy0f+8BEEzq(yk%(_R;6oJ?A3+Rcmft zGb}In_lMKz=}p%Rxw&rb=E`mNRQ`i$V{I_)wH(*o@VPwBw3Sry&&5F-0|O~#21Kbc z3pc-I1!$oUzAVly_iUt0fw|BqGoaBWTiHIEWZS3#NbXG<)0gKv`x-=h^d$0~ub_9g zJrc}OdBF&gR=hxz)@4kR=nfw4mWP<qTID2ZKyLYq=`M3?h?Eta1*=+yqYJjK+zNLq z<po9%f=GFcp;GQ1NUQPlSr9X42ckSj5M>NXLwI`=5lCt411Y_BAmu$IkaqWVNrh1p zsAClvjDZ%FB?3{VM#tFgZdKUrGF*p9d7B2Jm6Rls9kc2_EpA2IRShHD@wdJbVC+YE zSxbc4x%W(0s=|~Lrm>7ufV95)GzvA<sRJoXTX`p?6ki~vFlMGm+0VyKc)Jq<mC^=X z-g>D*l|af803%4s2=!Dqfq`h*b#C*r`jWMd9OolZX2AdxQA+CoqVyuUD@TBvJSLE` zAOWOw(;&*|0YsUL=gyzKRC@<f770S7G%10!C!br-szwNrGT6#=n)cv%zU)SUO1qP~ zsR9~hfrOR{(vptBKw9wv(sCF`%V8kgMOJd3ySpkFf??$i*=RxOILy^>xhgcVM!Z&1 zclO3TV1`-=#%h;_sIH^4tkO4xyOniJ5u!Rk+FLMnjUa+3Jqd_XKSpm$x2n498X9F> z4x;pLk0?iZ9!O!?2=Pc>RUTn<uw!rC2JrY;vzM#pa#hn;>Sh5QC*F}r$4F&$>rMvV zq;6}Cf#5+Jtp!q=5)fs=p0~Bj?xH}-d<V$ux^7>rF+%0}e%BP1sg%^g$)y{oN~VFd zqLp`OI~wr^ot#W3Ay1iAH=#254jb2P1V}3<#P8+ar?03__g+U0%;PJAnY?OW9pQ9# z+iVy06>7SV${2vQpz_GmNK^)KK*|o1OzrC=@iOPHatVQyr6ZbDP?3%bkn&;-DrH_w zR||UBtqVxmKpHA#^8k=muSSy4Z3ReaPt(&=nxR0-<Zw*NsEkD2pwFT@GX$VCT$vzQ zZURARqM*7cdFf;7Xbj`nLP~SWy4tM|b6PoF=8v?pJ4TjtdnX7g4-rK#Z4hHY%6OUf zr53?~v;v=Du2s5Di&R-M%y5q~&jnIC5)f6pC$smuu7r#pEF(Z5n3Kwh4oKxj2BKmw z2;{wrg)WdPPPstZGr>_+2M?99pBO~>W*i7kq}}{55rwg1vSC-QV5%PmqR`#trMw4b z0$yoQf~ac5On&Qf&;Uvbv7;56reo1>r!UNj!iZK!U8Z26P+czvFbQ8XPU)Y%fB)TA zUw{AZhu?oZef{O>_aD#ybw2<3Yi?=mySbYk&`B`AjhyFmyReh$?k?=ao$bL+?Cv%+ z4O=&5avPc^p_@6wHWceRDPiowPHs)Mv6zub_PLG4j!r6i+t?(RH$%Z~Xi7KofTnrM zHdUaLUX?xA*P`Ul?x;4Q!~&DBaSv3l7_yUWYY$m#X%fXrp|}KqPN0Id5*6&^CAe)) zjX{w&?|HYe9M2>(+J)8oh3=&FVGms`8L_PLbVuCkv6@rR)sg{r(g3l|Wr_v2n+Ntl zCofHIL-8Z_fPiXallp-31tboWxNUp0xmlDFHH{WGslua4-{&?IA7YP(*LCb>X*#u$ zZd&;#+2;0c!P@8#x>hQf(&d>9c2eZq-Y&=UdbqdmJQpS-F1zkLH;Iitc&Bie30p3E z3D#P*U~O1_BuJf9W42iZc9ITnW0BvtV6F35`Of9`nvp{rtG*%o!9Pn|Ok2E7x#)Gb z;&^F#cisc6HEfqQc$b{!JC-)+1FM%jEyZlg;xHG^)J*5|Z4=E14R^5V*|xE9+J0?% zNl&LW6?~(T3`FwSXaA7ZD;{?+F5k+xLu~pVtS#GRyqa}D!P@!^SgW?_ueH`9Sk0>3 z-@4O%gO>+7>2cn)7eFVkY43nqF41l~>HFR$N|dR|PCIb-kk#ipawcb^=+5=KoqjlJ zd$(BgNsI3;tkpdFgf*AnqCV*;luC<%vZj%VdYUZOobcjSW6AabrI}9d7R#u~g0-b6 zruEq+b3QUY7$@tpOrSOx0&2OGzHTQH>2cO<LLjWZrzhDObH@JZ0!y%HkWwF7gD2O; zo0>Pir|ld;K9X$<Z^2Hwk+zqN-pSe~43;bgX|EN4+FCh6EG@<KBb}S<rA=A0cRIgm zn`#ON&a#dEW#>Hj?wc+z25V~_^xfKO>dLsgZ6>32Hk+P)>~NFnw0(3NiMHBAjh;%$ z;=J>t>OpyJ%>k#~oLel)mt~zr3sM#c10@<at}0M@l?GIXrWqLpUA11IGR%XjECYd* zp&Ce89S3RmiGQ<#G+d)Q7@*RW<vz+v5>U^k3`A5ynDEYWNyguqnI^;UKqv3$ZJINn zvb7DW+)=7XHc%O71#0b?uHr*fW+;TP&O>GH+#8_sOra`+LXa|L&FnjAeFK#?iFt6g zoE@kXL7*~C&TKhLG@S%WPZ`l>lSZJLzd&W03so6+<ZfO@p>Jpj0P5E(qitx)UYPVC zSV=;5UA75Pt1`J#RO!JHA1;dz^es%@2HtYTAf-<PDT7duui?t9VduC!9QREB-rOFM zhAfgFalTS`l0-WZn7%5uwNaWmChrk%BDIo?57Vh)&gp439CUNF)2Sw#$dO8@_?iiS zHWLn1M)j9Z<*5Y+Dq}~W)`bQtlL?^mE;y|tllK_6BqC7heJ>3w?raEeELFUjOfK`y zJ?SE7lEDG1lr~VAgQven*1Q6>ItTO(3$`+hX_B#Lnrmdx5U6DnR!trb4Hz1lR%|7Q zX=ael9H`WrG2hHCSB6&O>w=Evk%(6rG`=-RNk+vxfyy|Wh7B9p1u7GuK#_e@-UCVk zuQFoB#p%5Ql`ltzs@*HGs1*sIGLmJ%TAGsYsZciGz?2zApt1uyP+5Eh$_M4090Ii> zJp7cSiV(_@FWozcushehIqxiK5=VvPP@Nrp(yz&aB&1Xv3Uu<}x2?z<sB~~o<E2rz zUUC!3-i374(IoKjNNIf;PANqfq&=5>w?z5Hbb!go1E`G6#+*#2qoi-ndOjh25=qqI zkuvb5LvAu6XH&Zg9OQCi0hN=;*i4U<4m?EPKvnj<(77d54Wumgj&3n^Fd$|1J~J*Y zbmV&~CE@$D($?q8wz9AjsI(1%N<9TCYaI;jv`oT$z5#0ai`YBY&rYRB1Zkyr`X%x$ z%bQONyU;U8S<wj;eKR?6p!Q%$MC6fTwPihXK1g-*WTxYjw~*6eq}xDvt88{!RvZvx zBvTl^2`ukT&(rU#Y;uUWN=C{I&9pV7K&9OsqdJva11YT$NSTL>uN*r$LPaf$z!|&1 zC`F<wBlZKSy!Sxj5PQ=(kT6Lim8OLzEK)A`C#({|3sys?UnyI+4xx%y)4LtPpb-Ah z9Wj!|s)BmRQWdnCBc}$0D%MbyNP<NJRO)uYsvGh^v4<+m0vjWP*HL;4R`r#HEU}&C zRIo_1Te8jJE^D?Ylz3F-$RRtlUetyzSnGTsYprrpM|(a1rOxL<)kF1yxka;7IT{be zOgRkzXsH6#n+2<tHmQ|VfZU@=Mp66IQUO*KL87zrWg4JZ3Y9YtsI_fe*N}OLBp_5- z?(0~&S?lG)@!|CL*2@q>1g*c~J*7V{w3-<kx=5gP&+--nsw4Atk}rpQhUkbUoGgrw zz)NV#X>zXwKN^cbY3#5Na?J>owWahr+lT<DjR@#n)|LsZT``9v{YHkiy2Hu$;O*h$ zEAJZ1X0iV&CZPRUho|HF<jy%KzjImT1Br*Y|HeY`$!{x7<NM-rrz8G%#x1SAHew6b zR)WCVs0*kn*yemPQc((pe68$4)ta_^d|XC9nG|UEig~h7y3#z<W-N4G+hPt-t+L3H zw2A3PmQcb1>2^TcV+CqmM+CyzHR67aw6;Ie;0(m{BgdPSSD!^*+Jc?0O1GeTl?00x ztm+`WV0kg!g%V>vnR9eB;b}(~tSwE@!l#{Numn#k>Y}*~tP_o3IblneS8wDQVPmXu zOJkkp74m2;BYke1ujKPc4<)a_TE~^KZ2F8~vh}qYkY;NnVAbm|Se(4{>x5ahF@)wY zH%=^qwMTpDt#Trj;a}FvUj~Osx+6h!F#_K$wy||SEo>cNuy9LRNQRx+>Mi;sBcgo0 z)-D*V<uL=xtg)a)MjKYJwrEU$jaCA%nrw;=PJPaDm~TFGDdj(J397{s8rWJ;pfYC) zRQl*ZWiG)xVVWw5uuDTADve8ma*TvAX}n>DUsnbks4V7yl(n3^aVNz$bq19l=88wX z%#M}<l{FQhGV}pSDy)Pd9@&WzPE~pfZoy>U&fGE0UA&@{2011TVaT)<$6Xyj>uW;F zH!d^EG%e*{az!Z51gNwMAZ2I;Ql2JAnGRhBx+&ETRQB63mCbr@pq5E^VA*)UeKD?7 z<!?ZxV`8+LWp6&duFOIvt4R+GRHmsQEsJ!p%aAj>TEx#o?M88ivT)8-I_p(o%3lpD zD;rTjLe>NfxxSZyCr~$oX?X!k6-=&I+M~Qjrbu$_FEem1{a^jzqy_4@98ORFA1zSh zdv!|c_*|U~t3k>l_ldyR#TX#fOUBC!NSU0Su+lPD4^-aO0G0I&hUHmNM=Pb0B&~Dm zrLT|Kr<Ff&a<!>StH^|I8cQ?QQbpuQpW7xt8IR+uUFynQK3V^!dk6hghmtND_G&y) zp!QfsaY|(YX)KYYj6h{Uf{}hUPY+Z!8v&J$H^Ts}7y^~iJXB@o4ASnDXvnIKoNv0k zj0PHKkO)(yZ@P>hbV`HccOf}MWiSs?*6{LWPOIYbb!v`OevM~&L0@EWq??l-hrw6T zZlJQ;AW)eU1}g1Qpi+=Bic-Fb$8bk!%9yjLiKdRC%*v$|%L&BrPmbyvL?5V?C7`lU zl~I1O#-CAsR&RjXy^cDJI~<@gJ4l0_EF4{W^OPW_&rP0dTE?8>q=D)UP-)XbRc6Kc zn5Jx04iuMTVtt_Uigk=OQ$b7m#H7XrDr4>^OoUX{j3|?FOQ6y)0hM<#X^4}NOQ4oZ zKxNvP)-=iBXDozN)-<(Qsw2-;)-Y6M<P9{p_D0VGMHCfuSW>0wY2TBEDNyOEG7w8% zveEZvy+)wYeg|p~XP72;u0RRFl}G@UZjKg5S$_j*`I~R$IAt9Hlk%+eW7_3%x$J_T zv}2;J=12pT*%(liRS`x0qzH~)Oh**vx<W3En9_2Cl!lu&Oq`UGK%lb03#1hz-FOF0 zsib3|oAOT}Wn#iuHZ8fsQ7Cf+Rcje^N%Xw>w!*vj-+cJ~-49><`rlqY`xP%JFxUwl znI6Wi$5*V*jmI$;D>2(`z)C=S6R;A+-guq>bj60<0H9jk7FPSeZxUEZ9Bw=xot>J} zo6l1`p4s~w&yRes1FTrfn*_QI^XuIS5bvz?9Nt*b>PPFZpI`j|`;Euj=X1hSA9CaO zDUr?trt9Y!r&H<7YU}6q&yz8&II^41SIf`rn1VZrAExH}dNm0eriH%i$8*hGC}X^4 zv369kGSB(OV6oTd?(vM}Eq;A9F23euI+OQvG9#gi#qjt_j(N2h9$)d?*N@{-n#E*3 zXR+11XtUoQQ{%3F9r<0UK3(sXG^tq)0dd2c)wYf6#jG};UO&Ei(|7%N>}tXIgnPx~ zh<l??c}HR8#lP#-^L%6G`TdoSwyU3~_k69Ack}s`BE+rd3DtIoEi7jxf4~0q5t!~b zW@&nF<oMDgzu(gH{A*$VR)^;4<#1k(hus~T*ZU=kZH&nHzW07v=JAyl`>WN^$=bLz zdP*x%8~vdB7oXn9hBvhPRRK<(Z|n%quYBU?nwcZFEw;<^jb>$X*GgmV&GoF525vpy zxMr49{(6&aoDvf4w>mt|(RR<T)TFL?!|ylA8|$ffxmszxUd`}Wg`~x!RFAKecdk}L zS6;K%e1x^BUC^}BNCJ;H&X&HGDn;e_#yRI=yD@1z->5i`H>~9DxiL6A-XwUrWL_z` zUdatN%S{}@W#>w9_3GF8g44>2RM(Fq2Tiif7j{-&9=Q7XMgb`YBJlfbW8?Y#Qj^E| z@s-=8pRN8Uz{*|44L}tsgaSXm-!}(<wCxuk0PK0c#n1D319h)Q$@7Gzy9*Y1J~qGj zeHOFd4)XgWb>16TU!Gs-@Vh?Z%RJ}36`BwTgPH|JgPY&S&{of5PF7l-ZtQrq9r4EV zxejVJoT5}C5d<asol8!DxM6=317H}%;s`Fe=-zBA1Q6*;>H^pwoP+`&_}HH?{sgGF qnwP2f2SA|!pfVKzIJuoEcnENA`;+hhT2=UqCS0;){{Pc2{`7xcczBoq delta 16 Xcmcb4isjK4#tku5Y+MQowOq9TLLCNK diff --git a/robots-at-robots/research/robots_at_robots/armor_color/armor_color_benchmarker.py b/robots-at-robots/research/robots_at_robots/armor_color/armor_color_benchmarker.py new file mode 100644 index 0000000..a01bf0d --- /dev/null +++ b/robots-at-robots/research/robots_at_robots/armor_color/armor_color_benchmarker.py @@ -0,0 +1,20 @@ +from typing import List + +from polystar.common.models.object import ArmorColor +from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilder +from research.robots_at_robots.armor_color.armor_color_dataset import make_armor_color_dataset_generator +from research.robots_at_robots.evaluation.benchmark import make_armor_value_benchmarker + + +def make_armor_color_benchmarker( + train_roco_datasets: List[ROCODatasetBuilder], test_roco_datasets: List[ROCODatasetBuilder], experiment_name: str +): + dataset_generator = make_armor_color_dataset_generator() + return make_armor_value_benchmarker( + train_roco_datasets=train_roco_datasets, + test_roco_datasets=test_roco_datasets, + evaluation_project="armor-color", + experiment_name=experiment_name, + classes=list(ArmorColor), + dataset_generator=dataset_generator, + ) diff --git a/robots-at-robots/research/robots_at_robots/armor_color/armor_color_pipeline_reporter_factory.py b/robots-at-robots/research/robots_at_robots/armor_color/armor_color_pipeline_reporter_factory.py deleted file mode 100644 index a24ad1a..0000000 --- a/robots-at-robots/research/robots_at_robots/armor_color/armor_color_pipeline_reporter_factory.py +++ /dev/null @@ -1,24 +0,0 @@ -from typing import List - -from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilder -from research.robots_at_robots.armor_color.armor_color_dataset import make_armor_color_dataset_generator -from research.robots_at_robots.evaluation.image_pipeline_evaluation_reporter import ImagePipelineEvaluationReporter -from research.robots_at_robots.evaluation.image_pipeline_evaluator import ImagePipelineEvaluator - - -class ArmorColorPipelineReporterFactory: - @staticmethod - def from_roco_datasets( - train_roco_datasets: List[ROCODatasetBuilder], - test_roco_datasets: List[ROCODatasetBuilder], - experiment_name: str, - ): - return ImagePipelineEvaluationReporter( - evaluator=ImagePipelineEvaluator( - train_roco_datasets=train_roco_datasets, - test_roco_datasets=test_roco_datasets, - image_dataset_generator=make_armor_color_dataset_generator(), - ), - evaluation_project="armor-color", - experiment_name=experiment_name, - ) diff --git a/robots-at-robots/research/robots_at_robots/armor_color/baseline_experiments.py b/robots-at-robots/research/robots_at_robots/armor_color/benchmark.py similarity index 85% rename from robots-at-robots/research/robots_at_robots/armor_color/baseline_experiments.py rename to robots-at-robots/research/robots_at_robots/armor_color/benchmark.py index 703fefe..1ac6f2b 100644 --- a/robots-at-robots/research/robots_at_robots/armor_color/baseline_experiments.py +++ b/robots-at-robots/research/robots_at_robots/armor_color/benchmark.py @@ -13,9 +13,7 @@ from polystar.common.pipeline.classification.random_model import RandomClassifie from polystar.common.pipeline.classification.rule_based_classifier import RuleBasedClassifierABC from polystar.common.pipeline.pipe_abc import PipeABC from research.common.datasets.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo -from research.robots_at_robots.armor_color.armor_color_pipeline_reporter_factory import ( - ArmorColorPipelineReporterFactory, -) +from research.robots_at_robots.armor_color.armor_color_benchmarker import make_armor_color_benchmarker class ArmorColorPipeline(ClassificationPipeline): @@ -38,20 +36,20 @@ class RedBlueComparisonClassifier(RuleBasedClassifierABC): if __name__ == "__main__": logging.getLogger().setLevel("INFO") - reporter = ArmorColorPipelineReporterFactory.from_roco_datasets( - train_roco_datasets=[ + _benchmarker = make_armor_color_benchmarker( + [ ROCODatasetsZoo.TWITCH.T470150052, ROCODatasetsZoo.TWITCH.T470152289, ROCODatasetsZoo.TWITCH.T470149568, ROCODatasetsZoo.TWITCH.T470151286, ], - test_roco_datasets=[ + [ ROCODatasetsZoo.TWITCH.T470152838, ROCODatasetsZoo.TWITCH.T470153081, ROCODatasetsZoo.TWITCH.T470158483, ROCODatasetsZoo.TWITCH.T470152730, ], - experiment_name="test", + "test", ) red_blue_comparison_pipeline = ArmorColorPipeline.from_pipes( @@ -62,4 +60,4 @@ if __name__ == "__main__": [RGB2HSV(), Histogram2D(), LogisticRegression()], name="hsv-hist-lr", ) - reporter.report([random_pipeline, red_blue_comparison_pipeline, hsv_hist_lr_pipeline]) + _benchmarker.benchmark([random_pipeline, red_blue_comparison_pipeline, hsv_hist_lr_pipeline]) diff --git a/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_benchmarker.py b/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_benchmarker.py new file mode 100644 index 0000000..f4792c4 --- /dev/null +++ b/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_benchmarker.py @@ -0,0 +1,20 @@ +from typing import List + +from polystar.common.models.object import ArmorDigit +from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilder +from research.robots_at_robots.armor_digit.armor_digit_dataset import make_armor_digit_dataset_generator +from research.robots_at_robots.evaluation.benchmark import make_armor_value_benchmarker + + +def make_armor_digit_benchmarker( + train_roco_datasets: List[ROCODatasetBuilder], test_roco_datasets: List[ROCODatasetBuilder], experiment_name: str +): + dataset_generator = make_armor_digit_dataset_generator() + return make_armor_value_benchmarker( + train_roco_datasets=train_roco_datasets, + test_roco_datasets=test_roco_datasets, + evaluation_project="armor-digit", + experiment_name=experiment_name, + classes=list(ArmorDigit), + dataset_generator=dataset_generator, + ) diff --git a/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_pipeline_reporter_factory.py b/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_pipeline_reporter_factory.py deleted file mode 100644 index 6c5f9a0..0000000 --- a/robots-at-robots/research/robots_at_robots/armor_digit/armor_digit_pipeline_reporter_factory.py +++ /dev/null @@ -1,24 +0,0 @@ -from typing import List - -from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilder -from research.robots_at_robots.armor_digit.armor_digit_dataset import make_armor_digit_dataset_generator -from research.robots_at_robots.evaluation.image_pipeline_evaluation_reporter import ImagePipelineEvaluationReporter -from research.robots_at_robots.evaluation.image_pipeline_evaluator import ImagePipelineEvaluator - - -class ArmorDigitPipelineReporterFactory: - @staticmethod - def from_roco_datasets( - train_roco_datasets: List[ROCODatasetBuilder], - test_roco_datasets: List[ROCODatasetBuilder], - experiment_name: str, - ): - return ImagePipelineEvaluationReporter( - evaluator=ImagePipelineEvaluator( - train_roco_datasets=train_roco_datasets, - test_roco_datasets=test_roco_datasets, - image_dataset_generator=make_armor_digit_dataset_generator(), - ), - evaluation_project="armor-digit", - experiment_name=experiment_name, - ) diff --git a/robots-at-robots/research/robots_at_robots/armor_digit/benchmark.py b/robots-at-robots/research/robots_at_robots/armor_digit/benchmark.py index 757247e..1b48d0e 100644 --- a/robots-at-robots/research/robots_at_robots/armor_digit/benchmark.py +++ b/robots-at-robots/research/robots_at_robots/armor_digit/benchmark.py @@ -3,8 +3,6 @@ import warnings from pathlib import Path from typing import List, Sequence, Tuple -import seaborn as sns -from cv2.cv2 import resize from keras_preprocessing.image import ImageDataGenerator from numpy import asarray from tensorflow_core.python.keras import Input, Model, Sequential @@ -15,16 +13,15 @@ from tensorflow_core.python.keras.optimizer_v2.adam import Adam from tensorflow_core.python.keras.optimizer_v2.gradient_descent import SGD from tensorflow_core.python.keras.utils.np_utils import to_categorical +from polystar.common.image_pipeline.preprocessors.normalise import Normalise +from polystar.common.image_pipeline.preprocessors.resize import Resize from polystar.common.models.image import Image from polystar.common.models.object import ArmorDigit from polystar.common.pipeline.classification.classification_pipeline import ClassificationPipeline from polystar.common.pipeline.classification.classifier_abc import ClassifierABC from polystar.common.pipeline.classification.random_model import RandomClassifier -from polystar.common.pipeline.pipe_abc import PipeABC from research.common.datasets.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo -from research.robots_at_robots.armor_digit.armor_digit_pipeline_reporter_factory import ( - ArmorDigitPipelineReporterFactory, -) +from research.robots_at_robots.armor_digit.armor_digit_benchmarker import make_armor_digit_benchmarker class ArmorDigitPipeline(ClassificationPipeline): @@ -45,14 +42,14 @@ class KerasClassifier(ClassifierABC): return ImageDataGenerator(rotation_range=45, zoom_range=[0.8, 1]) # brightness_range=[0.7, 1.4] def fit(self, images: List[Image], labels: List[int]) -> "KerasClassifier": - n_val: int = 540 # FIXME + n_val: int = 371 # FIXME images = asarray(images) labels = to_categorical(asarray(labels), 5) # FIXME train_images, train_labels = images[:-n_val], labels[:-n_val] val_images, val_labels = images[-n_val:], labels[-n_val:] batch_size = 32 # FIXME - train_generator = self.train_data_gen.flow(train_images, train_labels, batch_size) + train_generator = self.train_data_gen.flow(train_images, train_labels, batch_size=batch_size, shuffle=True) self.model.fit( x=train_generator, @@ -100,19 +97,6 @@ class CNN(KerasClassifier): ) -class Resize(PipeABC): - def __init__(self, size: Tuple[int, int]): - self.size = size - - def transform_single(self, image: Image) -> Image: - return resize(image, self.size) - - -class Normalise(PipeABC): - def transform_single(self, image: Image) -> Image: - return image / 255 - - def make_digits_cnn_pipeline( input_size: int, conv_blocks: Sequence[Sequence[int]], report_dir: Path, with_data_augmentation: bool, lr: float ) -> ArmorDigitPipeline: @@ -186,9 +170,7 @@ if __name__ == "__main__": logging.getLogger("tensorflow").setLevel("ERROR") warnings.filterwarnings("ignore") - sns.set_style() - - reporter = ArmorDigitPipelineReporterFactory.from_roco_datasets( + _benchmarker = make_armor_digit_benchmarker( train_roco_datasets=[ # ROCODatasetsZoo.DJI.CENTRAL_CHINA, # ROCODatasetsZoo.DJI.FINAL, @@ -200,32 +182,39 @@ if __name__ == "__main__": ROCODatasetsZoo.TWITCH.T470152289, ], test_roco_datasets=[ - # ROCODatasetsZoo.TWITCH.T470152838, ROCODatasetsZoo.TWITCH.T470153081, ROCODatasetsZoo.TWITCH.T470158483, ROCODatasetsZoo.TWITCH.T470152730, ], - experiment_name="data_augm", + experiment_name="test-benchmarker", ) random_pipeline = ArmorDigitPipeline.from_pipes([RandomClassifier()], name="random") + report_dir = _benchmarker.reporter.report_dir cnn_pipelines = [ - make_digits_cnn_pipeline(32, ((32, 32), (64, 64)), reporter.report_dir, with_data_augmentation=True, lr=lr) - for lr in (1e-2, 5e-3, 2e-3, 1e-3, 5e-4, 2e-4) - ] + [ make_digits_cnn_pipeline( - 64, ((32,), (64, 64), (64, 64)), reporter.report_dir, with_data_augmentation=False, lr=lr + 32, ((32, 32), (64, 64)), report_dir, with_data_augmentation=with_data_augmentation, lr=lr, ) - for lr in (5e-2, 2e-2, 1e-2, 5e-3, 2e-3, 1e-3) + for with_data_augmentation in [False] + for lr in [2.5e-2, 1.6e-2, 1e-2, 6.3e-3, 4e-4] ] + # cnn_pipelines = [ + # make_digits_cnn_pipeline( + # 64, ((32,), (64, 64), (64, 64)), reporter.report_dir, with_data_augmentation=True, lr=lr + # ) + # for with_data_augmentation in [True, False] + # for lr in (5.6e-2, 3.1e-2, 1.8e-2, 1e-2, 5.6e-3, 3.1e-3, 1.8e-3, 1e-3) + # ] vgg16_pipelines = [ - make_vgg16_pipeline(reporter.report_dir, input_size=32, with_data_augmentation=True, lr=lr) + make_vgg16_pipeline(report_dir, input_size=32, with_data_augmentation=False, lr=lr) for lr in (1e-5, 5e-4, 2e-4, 1e-4, 5e-3) ] - logging.info(f"Run `tensorboard --logdir={reporter.report_dir}` for realtime logs") + logging.info(f"Run `tensorboard --logdir={report_dir}` for realtime logs") - reporter.report([random_pipeline, *cnn_pipelines, *vgg16_pipelines]) + _benchmarker.benchmark( + [random_pipeline,] + ) diff --git a/robots-at-robots/research/robots_at_robots/armor_digit/clean_datasets.py b/robots-at-robots/research/robots_at_robots/armor_digit/clean_datasets.py index 943d412..394a1c4 100644 --- a/robots-at-robots/research/robots_at_robots/armor_digit/clean_datasets.py +++ b/robots-at-robots/research/robots_at_robots/armor_digit/clean_datasets.py @@ -18,7 +18,18 @@ if __name__ == "__main__": _armor_digit_dataset = ( make_armor_digit_dataset_generator() .from_roco_dataset(_roco_dataset) - .skip((1009 - 117) + (1000 - 86) + (1000 - 121) + (1000 - 138) + (1000 - 137)) + .skip( + (1009 - 117) + + (1000 - 86) + + (1000 - 121) + + (1000 - 138) + + (1000 - 137) + + (1000 - 154) + + (1000 - 180) + + (1000 - 160) + + (1000 - 193) + + (1000 - 80) + ) .cap(1000) ) diff --git a/robots-at-robots/research/robots_at_robots/dataset/armor_value_dataset_generator.py b/robots-at-robots/research/robots_at_robots/dataset/armor_value_dataset_generator.py index 7b4ce98..4aafd34 100644 --- a/robots-at-robots/research/robots_at_robots/dataset/armor_value_dataset_generator.py +++ b/robots-at-robots/research/robots_at_robots/dataset/armor_value_dataset_generator.py @@ -5,6 +5,7 @@ from polystar.common.filters.exclude_filter import ExcludeFilter from polystar.common.filters.filter_abc import FilterABC from polystar.common.filters.pass_through_filter import PassThroughFilter from research.common.dataset.cleaning.dataset_changes import DatasetChanges +from research.common.datasets.image_dataset import FileImageDataset from research.common.datasets.image_file_dataset_builder import DirectoryDatasetBuilder from research.common.datasets.lazy_dataset import TargetT from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilder @@ -28,12 +29,13 @@ class ArmorValueDatasetGenerator(Generic[TargetT]): self.task_name = task_name self.targets_filter = targets_filter or PassThroughFilter() - def from_roco_datasets(self, roco_datasets: List[ROCODatasetBuilder]) -> List[DirectoryDatasetBuilder[TargetT]]: - return [self.from_roco_dataset(roco_dataset) for roco_dataset in roco_datasets] + # FIXME signature inconsistency across methods + def from_roco_datasets(self, roco_datasets: List[ROCODatasetBuilder]) -> List[FileImageDataset[TargetT]]: + return [self.from_roco_dataset(roco_dataset).to_file_images().build() for roco_dataset in roco_datasets] def from_roco_dataset(self, roco_dataset_builder: ROCODatasetBuilder) -> DirectoryDatasetBuilder[TargetT]: cache_dir = roco_dataset_builder.main_dir / self.task_name - dataset_name = f"{roco_dataset_builder.name}_armor_{self.task_name}" + dataset_name = roco_dataset_builder.name ArmorValueDatasetCache(roco_dataset_builder, cache_dir, dataset_name, self.target_factory).generate_if_needed() diff --git a/robots-at-robots/research/robots_at_robots/demos/demo_pipeline.py b/robots-at-robots/research/robots_at_robots/demos/demo_pipeline.py index 0212bf8..c3a4d34 100644 --- a/robots-at-robots/research/robots_at_robots/demos/demo_pipeline.py +++ b/robots-at-robots/research/robots_at_robots/demos/demo_pipeline.py @@ -16,7 +16,7 @@ from polystar.common.utils.tensorflow import patch_tf_v2 from polystar.common.view.plt_results_viewer import PltResultViewer from polystar.robots_at_robots.dependency_injection import make_injector from research.common.datasets.roco.zoo.roco_dataset_zoo import ROCODatasetsZoo -from research.robots_at_robots.armor_color.baseline_experiments import ( +from research.robots_at_robots.armor_color.benchmark import ( ArmorColorPipeline, MeanChannels, RedBlueComparisonClassifier, diff --git a/robots-at-robots/research/robots_at_robots/evaluation/benchmark.py b/robots-at-robots/research/robots_at_robots/evaluation/benchmark.py new file mode 100644 index 0000000..045b13d --- /dev/null +++ b/robots-at-robots/research/robots_at_robots/evaluation/benchmark.py @@ -0,0 +1,49 @@ +from dataclasses import dataclass +from typing import List + +from polystar.common.pipeline.classification.classification_pipeline import ClassificationPipeline +from research.common.datasets.image_dataset import FileImageDataset +from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilder +from research.robots_at_robots.dataset.armor_value_dataset_generator import ArmorValueDatasetGenerator +from research.robots_at_robots.evaluation.image_pipeline_evaluation_reporter import ImagePipelineEvaluationReporter +from research.robots_at_robots.evaluation.image_pipeline_evaluator import ImageClassificationPipelineEvaluator +from research.robots_at_robots.evaluation.metrics.f1 import F1Metric +from research.robots_at_robots.evaluation.trainer import ImageClassificationPipelineTrainer + + +@dataclass +class Benchmarker: + def __init__( + self, + train_datasets: List[FileImageDataset], + test_datasets: List[FileImageDataset], + evaluation_project: str, + experiment_name: str, + classes: List, + ): + self.trainer = ImageClassificationPipelineTrainer(train_datasets) + self.evaluator = ImageClassificationPipelineEvaluator(train_datasets, test_datasets) + self.reporter = ImagePipelineEvaluationReporter( + evaluation_project, experiment_name, classes, other_metrics=[F1Metric()] + ) + + def benchmark(self, pipelines: List[ClassificationPipeline]): + self.trainer.train_pipelines(pipelines) + self.reporter.report(self.evaluator.evaluate_pipelines(pipelines)) + + +def make_armor_value_benchmarker( + train_roco_datasets: List[ROCODatasetBuilder], + test_roco_datasets: List[ROCODatasetBuilder], + evaluation_project: str, + experiment_name: str, + dataset_generator: ArmorValueDatasetGenerator, + classes: List, +): + return Benchmarker( + dataset_generator.from_roco_datasets(train_roco_datasets), + dataset_generator.from_roco_datasets(test_roco_datasets), + evaluation_project=evaluation_project, + experiment_name=experiment_name, + classes=classes, + ) diff --git a/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py b/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py index 6cd66e5..72996a9 100644 --- a/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py +++ b/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluation_reporter.py @@ -1,10 +1,8 @@ from collections import Counter -from dataclasses import dataclass, field -from enum import Enum +from dataclasses import InitVar, dataclass, field from math import log from os.path import relpath -from pathlib import Path -from typing import Dict, Generic, Iterable, List, Optional, Tuple +from typing import Generic, List, Optional, Tuple import matplotlib.pyplot as plt import numpy as np @@ -12,189 +10,233 @@ import seaborn as sns from matplotlib.axes import Axes, logging from matplotlib.figure import Figure from pandas import DataFrame +from sklearn.metrics import classification_report, confusion_matrix from polystar.common.pipeline.classification.classification_pipeline import EnumT -from polystar.common.pipeline.pipeline import Pipeline from polystar.common.utils.dataframe import Format, format_df_row, format_df_rows, make_formater from polystar.common.utils.markdown import MarkdownFile from polystar.common.utils.time import create_time_id from research.common.constants import DSET_DIR, EVALUATION_DIR -from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilder -from research.robots_at_robots.evaluation.image_pipeline_evaluator import ( - ClassificationResults, - ImagePipelineEvaluator, - SetClassificationResults, -) - - -class Metric(Enum): - F1_WEIGHTED_AVG = ("f1-score", "weighted avg") - ACCURACY = ("precision", "accuracy") - - def __str__(self): - if self.value[1] == "accuracy": - return "accuracy" - return " ".join(self.value) - - def __getitem__(self, item): - return self.value[item] +from research.robots_at_robots.evaluation.metrics.accuracy import AccuracyMetric +from research.robots_at_robots.evaluation.metrics.metric_abc import MetricABC +from research.robots_at_robots.evaluation.performance import ClassificationPerformance, ClassificationPerformances +from research.robots_at_robots.evaluation.set import Set @dataclass class ImagePipelineEvaluationReporter(Generic[EnumT]): - evaluator: ImagePipelineEvaluator[EnumT] evaluation_project: str experiment_name: str - main_metric: Metric = Metric.F1_WEIGHTED_AVG - other_metrics: List[Metric] = field(default_factory=lambda: [Metric.ACCURACY]) + classes: List[EnumT] + main_metric: MetricABC = field(default_factory=AccuracyMetric) + other_metrics: InitVar[List[MetricABC]] = None + _mf: MarkdownFile = field(init=False) + _performances: ClassificationPerformances = field(init=False) - def __post_init__(self): + def __post_init__(self, other_metrics: List[MetricABC]): self.report_dir = EVALUATION_DIR / self.evaluation_project / f"{create_time_id()}_{self.experiment_name}" + self.all_metrics: List[MetricABC] = [self.main_metric] + (other_metrics or []) - def report(self, pipelines: Iterable[Pipeline]): - logging.info(f"Running experiment {self.experiment_name}") - - pipeline2results = self.evaluator.evaluate_pipelines(pipelines) + def report(self, performances: ClassificationPerformances): + sns.set() + self._performances = performances + with MarkdownFile(self.report_dir / "report.md") as self._mf: - with MarkdownFile(self.report_dir / "report.md") as mf: - mf.title(f"Evaluation report") - self._report_datasets(mf) - self._report_aggregated_results(mf, pipeline2results, self.report_dir) - self._report_pipelines_results(mf, pipeline2results) + self._mf.title(f"Evaluation report") + self._report_datasets() + self._report_aggregated_results() + self._report_pipelines_results() logging.info(f"Report generated at file:///{self.report_dir/'report.md'}") - def _report_datasets(self, mf: MarkdownFile): - mf.title("Datasets", level=2) + def _report_datasets(self): + self._mf.title("Datasets", level=2) - mf.title("Training", level=3) - self._report_dataset( - mf, self.evaluator.train_roco_datasets, self.evaluator.train_dataset_sizes, self.evaluator.train_labels - ) + self._mf.title("Training", level=3) + self._report_dataset(self._performances.train) - mf.title("Testing", level=3) - self._report_dataset( - mf, self.evaluator.test_roco_datasets, self.evaluator.test_dataset_sizes, self.evaluator.test_labels - ) + self._mf.title("Testing", level=3) + self._report_dataset(self._performances.test) - @staticmethod - def _report_dataset( - mf: MarkdownFile, roco_datasets: List[ROCODatasetBuilder], dataset_sizes: List[int], labels: List[EnumT] - ): - total = len(labels) - labels = [str(label) for label in labels] - mf.paragraph(f"{total} images") + def _report_dataset(self, performances: ClassificationPerformances): df = ( - DataFrame( - { - dataset.name: Counter(labels[start:end]) - for dataset, start, end in zip( - roco_datasets, np.cumsum([0] + dataset_sizes), np.cumsum(dataset_sizes) - ) - } - ) + DataFrame({perf.dataset_name: Counter(perf.labels) for perf in performances}) .fillna(0) .sort_index() + .astype(int) ) - df["Total"] = sum([df[d.name] for d in roco_datasets]) - df["Repartition"] = (df["Total"] / total).map("{:.1%}".format) - mf.table(df) - - def _report_aggregated_results( - self, mf: MarkdownFile, pipeline2results: Dict[str, ClassificationResults[EnumT]], report_dir: Path - ): - fig_scores, fig_times, aggregated_results = self._aggregate_results(pipeline2results) - aggregated_scores_image_name = "aggregated_scores.png" - fig_scores.savefig(report_dir / aggregated_scores_image_name) - aggregated_times_image_name = "aggregated_times.png" - fig_times.savefig(report_dir / aggregated_times_image_name) - - mf.title("Aggregated results", level=2) - mf.image(aggregated_scores_image_name) - mf.image(aggregated_times_image_name) - mf.paragraph("On test set:") - mf.table(aggregated_results[aggregated_results["set"] == "test"].drop(columns="set")) - mf.paragraph("On train set:") - mf.table(aggregated_results[aggregated_results["set"] == "train"].drop(columns="set")) - - def _report_pipelines_results(self, mf: MarkdownFile, pipeline2results: Dict[str, ClassificationResults[EnumT]]): - for pipeline_name, results in sorted( - pipeline2results.items(), - key=lambda name_results: name_results[1].test_results.report[self.main_metric[1]][self.main_metric[0]], + df["Total"] = df.sum(axis=1) + df["Repartition"] = df["Total"] / df["Total"].sum() + df.loc["Total"] = df.sum() + df.loc["Repartition"] = df.loc["Total"] / df["Total"]["Total"] + dset_repartition = df.loc["Repartition"].map("{:.1%}".format) + df["Repartition"] = df["Repartition"].map("{:.1%}".format) + df.loc["Repartition"] = dset_repartition + df.at["Total", "Repartition"] = "" + df.at["Repartition", "Repartition"] = "" + df.at["Repartition", "Total"] = "" + self._mf.table(df) + + def _report_aggregated_results(self): + fig_scores, fig_times = self._make_aggregate_figures() + + self._mf.title("Aggregated results", level=2) + self._mf.figure(fig_scores, "aggregated_scores.png") + self._mf.figure(fig_times, "aggregated_times.png") + + self._mf.paragraph("On test set:") + self._mf.table(self._make_aggregated_results_for_set(Set.TRAIN)) + self._mf.paragraph("On train set:") + self._mf.table(self._make_aggregated_results_for_set(Set.TEST)) + + def _report_pipelines_results(self): + for pipeline_name, performances in sorted( + self._performances.group_by_pipeline().items(), + key=lambda name_perfs: self.main_metric(name_perfs[1].test.merge()), reverse=True, ): - self._report_pipeline_results(mf, pipeline_name, results) + self._report_pipeline_results(pipeline_name, performances) - def _report_pipeline_results(self, mf: MarkdownFile, pipeline_name: str, results: ClassificationResults[EnumT]): - mf.title(pipeline_name, level=2) + def _report_pipeline_results(self, pipeline_name: str, performances: ClassificationPerformances): + self._mf.title(pipeline_name, level=2) - mf.paragraph(results.full_pipeline_name) + self._mf.title("Train results", level=3) + self._report_pipeline_set_results(performances, Set.TRAIN) - mf.title("Train results", level=3) - ImagePipelineEvaluationReporter._report_pipeline_set_results( - mf, results.train_results, self.evaluator.train_images_paths - ) + self._mf.title("Test results", level=3) + self._report_pipeline_set_results(performances, Set.TEST) - mf.title("Test results", level=3) - ImagePipelineEvaluationReporter._report_pipeline_set_results( - mf, results.test_results, self.evaluator.test_images_paths - ) + def _report_pipeline_set_results(self, performances: ClassificationPerformances, set_: Set): + performances = performances.on_set(set_) + perf = performances.merge() + + self._mf.title("Metrics", level=4) + self._report_pipeline_set_metrics(performances, perf, set_) - @staticmethod - def _report_pipeline_set_results( - mf: MarkdownFile, results: SetClassificationResults[EnumT], image_paths: List[Path] + self._mf.title("Confusion Matrix:", level=4) + self._report_pipeline_set_confusion_matrix(perf) + + self._mf.title("25 Mistakes examples", level=4) + self._report_pipeline_set_mistakes(perf) + + def _report_pipeline_set_metrics( + self, performances: ClassificationPerformances, perf: ClassificationPerformance, set_: Set ): - mf.title("Metrics", level=4) - mf.paragraph(f"Inference time: {results.mean_inference_time: .2e} s/img") - df = DataFrame(results.report) + fig: Figure = plt.figure(figsize=(9, 6)) + ax: Axes = fig.subplots() + sns.barplot( + data=DataFrame( + [ + {"dataset": performance.dataset_name, "score": metric(performance), "metric": metric.name} + for performance in performances + for metric in self.all_metrics + ] + + [ + {"dataset": performance.dataset_name, "score": len(performance) / len(perf), "metric": "support"} + for performance in performances + ] + ), + x="dataset", + hue="metric", + y="score", + ax=ax, + ) + ax.set_xticklabels(ax.get_xticklabels(), rotation=30, ha="right") + pipeline_name = performances.performances[0].pipeline_name + fig.suptitle(f"{pipeline_name} performance across {set_} datasets") + _format_ax(ax, "{:.1%}", limits=(0, 1)) + fig.tight_layout() + self._mf.figure(fig, f"{pipeline_name}_{set_}.png") + + self._mf.paragraph(f"Inference time: {perf.mean_inference_time: .2e} s/img") + df = DataFrame(classification_report(perf.labels, perf.predictions, output_dict=True)) format_df_rows(df, ["precision", "recall", "f1-score"], "{:.1%}") format_df_row(df, "support", int) - mf.table(df) - mf.title("Confusion Matrix:", level=4) - mf.table(DataFrame(results.confusion_matrix, index=results.unique_labels, columns=results.unique_labels)) - mf.title("25 Mistakes examples", level=4) - mistakes_idx = np.random.choice(results.mistakes, min(len(results.mistakes), 25), replace=False) + self._mf.table(df) + + def _report_pipeline_set_confusion_matrix(self, perf: ClassificationPerformance): + self._mf.table( + DataFrame( + confusion_matrix(perf.labels, perf.predictions), index=perf.unique_labels, columns=perf.unique_labels + ) + ) + + def _report_pipeline_set_mistakes(self, perf: ClassificationPerformance): + mistakes = perf.mistakes + mistakes_idx = np.random.choice(mistakes, min(len(mistakes), 25), replace=False) relative_paths = [ - f", str(mf.markdown_path.parent))})" for idx in mistakes_idx + f", str(self._mf.markdown_path.parent))})" + for idx in mistakes_idx + ] + images_names = [ + f"[{perf.examples[idx].path.relative_to(DSET_DIR)}]" + f"({relpath(str(perf.examples[idx].path), str(self._mf.markdown_path.parent))})" + for idx in mistakes_idx ] - images_names = [image_paths[idx].relative_to(DSET_DIR) for idx in mistakes_idx] - mf.table( + self._mf.table( DataFrame( { "images": relative_paths, - "labels": map(str, results.labels[mistakes_idx]), - "predictions": map(str, results.predictions[mistakes_idx]), + "labels": perf.labels[mistakes_idx], + "predictions": perf.predictions[mistakes_idx], + **{ + f"p({str(label)})": map("{:.1%}".format, perf.proba[mistakes_idx, i]) + for i, label in enumerate(self.classes) + }, "image names": images_names, } ).set_index("images") ) - def _aggregate_results( - self, pipeline2results: Dict[str, ClassificationResults[EnumT]] - ) -> Tuple[Figure, Figure, DataFrame]: - sns.set_style() - sets = ["train", "test"] + def _make_aggregate_figures(self) -> Tuple[Figure, Figure]: df = DataFrame.from_records( [ { - "pipeline": pipeline_name, - str(self.main_metric): results.on_set(set_).report[self.main_metric[1]][self.main_metric[0]], - "inference time": results.on_set(set_).mean_inference_time, - "set": set_, + "dataset": perf.dataset_name, + "pipeline": perf.pipeline_name, + self.main_metric.name: self.main_metric(perf), + "time": perf.mean_inference_time, + "set": perf.set_.name.lower(), + "support": len(perf), } - for pipeline_name, results in pipeline2results.items() - # for metric in [self.main_metric] # + self.other_metrics - for set_ in sets + for perf in self._performances ] - ).sort_values(["set", str(self.main_metric)], ascending=[True, False]) + ).sort_values(["set", self.main_metric.name], ascending=[True, False]) + df[f"{self.main_metric.name} "] = list(zip(df[self.main_metric.name], df.support)) + df["time "] = list(zip(df[self.main_metric.name], df.support)) + + return ( + _cat_pipeline_results(df, f"{self.main_metric.name} ", "{:.1%}", limits=(0, 1)), + _cat_pipeline_results(df, "time ", "{:.2e}", log_scale=True), + ) + + def _make_aggregated_results_for_set(self, set_: Set) -> DataFrame: + pipeline2performances = self._performances.on_set(set_).group_by_pipeline() + pipeline2performance = { + pipeline_name: performances.merge() for pipeline_name, performances in pipeline2performances.items() + } return ( - _cat_pipeline_results(df, str(self.main_metric), "{:.1%}", limits=(0, 1)), - _cat_pipeline_results(df, "inference time", "{:.2e}", log_scale=True), - df.set_index("pipeline"), + DataFrame( + [ + { + "pipeline": pipeline_name, + self.main_metric.name: self.main_metric(performance), + "inference time": performance.mean_inference_time, + } + for pipeline_name, performance in pipeline2performance.items() + ] + ) + .set_index("pipeline") + .sort_values(self.main_metric.name, ascending=False) ) +def weighted_mean(x, **kws): + val, weight = map(np.asarray, zip(*x)) + return (val * weight).sum() / weight.sum() + + def _cat_pipeline_results( df: DataFrame, y: str, fmt: str, limits: Optional[Tuple[float, float]] = None, log_scale: bool = False ) -> Figure: @@ -208,6 +250,8 @@ def _cat_pipeline_results( legend=False, col_order=["test", "train"], height=10, + estimator=weighted_mean, + orient="v", ) grid.set_xticklabels(rotation=30, ha="right") diff --git a/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluator.py b/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluator.py index 2a23706..266de79 100644 --- a/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluator.py +++ b/robots-at-robots/research/robots_at_robots/evaluation/image_pipeline_evaluator.py @@ -1,111 +1,57 @@ -import logging -from dataclasses import dataclass from enum import Enum -from pathlib import Path +from itertools import chain from time import time -from typing import Dict, Generic, Iterable, List, Sequence, Tuple +from typing import Generic, Iterable, List import numpy as np -from memoized_property import memoized_property -from sklearn.metrics import classification_report, confusion_matrix -from tqdm import tqdm -from polystar.common.models.image import Image, load_images -from polystar.common.pipeline.pipeline import Pipeline +from polystar.common.models.image import file_images_to_images +from polystar.common.pipeline.classification.classification_pipeline import ClassificationPipeline +from polystar.common.utils.iterable_utils import flatten +from research.common.datasets.image_dataset import FileImageDataset from research.common.datasets.lazy_dataset import TargetT -from research.common.datasets.roco.roco_dataset_builder import ROCODatasetBuilder -from research.common.datasets.union_dataset import UnionDataset -from research.robots_at_robots.dataset.armor_value_dataset_generator import ArmorValueDatasetGenerator +from research.robots_at_robots.evaluation.performance import ( + ClassificationPerformance, + ClassificationPerformances, + ContextualizedClassificationPerformance, +) +from research.robots_at_robots.evaluation.set import Set -@dataclass -class SetClassificationResults(Generic[TargetT]): - labels: np.ndarray - predictions: np.ndarray - mean_inference_time: float - - @property - def report(self) -> Dict: - return classification_report(self.labels, self.predictions, output_dict=True) - - @property - def confusion_matrix(self) -> Dict: - return confusion_matrix(self.labels, self.predictions) - - @property - def mistakes(self) -> Sequence[int]: - return np.where(self.labels != self.predictions)[0] - - @memoized_property - def unique_labels(self) -> List[TargetT]: - return sorted(set(self.labels) | set(self.predictions)) - - -@dataclass -class ClassificationResults(Generic[TargetT]): - train_results: SetClassificationResults[TargetT] - test_results: SetClassificationResults[TargetT] - full_pipeline_name: str - - def on_set(self, set_: str) -> SetClassificationResults[TargetT]: - if set_ is "train": - return self.train_results - return self.test_results - - -class ImagePipelineEvaluator(Generic[TargetT]): +class ImageClassificationPipelineEvaluator(Generic[TargetT]): def __init__( - self, - train_roco_datasets: List[ROCODatasetBuilder], - test_roco_datasets: List[ROCODatasetBuilder], - image_dataset_generator: ArmorValueDatasetGenerator[TargetT], + self, train_datasets: List[FileImageDataset], test_datasets: List[FileImageDataset], ): - logging.info("Loading data") - self.train_roco_datasets = train_roco_datasets - self.test_roco_datasets = test_roco_datasets - (self.train_images_paths, self.train_images, self.train_labels, self.train_dataset_sizes) = load_datasets( - train_roco_datasets, image_dataset_generator - ) - (self.test_images_paths, self.test_images, self.test_labels, self.test_dataset_sizes) = load_datasets( - test_roco_datasets, image_dataset_generator - ) - - def evaluate_pipelines(self, pipelines: Iterable[Pipeline]) -> Dict[str, ClassificationResults]: - tqdm_pipelines = tqdm(pipelines, desc="Training", unit="pipeline") - return {str(pipeline): self.evaluate_pipeline(pipeline, tqdm_pipelines) for pipeline in tqdm_pipelines} + self.train_datasets = train_datasets + self.test_datasets = test_datasets - def evaluate_pipeline(self, pipeline: Pipeline, tqdm_pipelines: tqdm) -> ClassificationResults: - tqdm_pipelines.set_postfix({"pipeline": pipeline.name}, True) - pipeline.fit(self.train_images, self.train_labels) + def evaluate_pipelines(self, pipelines: Iterable[ClassificationPipeline]) -> ClassificationPerformances: + return ClassificationPerformances(flatten(self._evaluate_pipeline(pipeline) for pipeline in pipelines)) - train_results = self._evaluate_pipeline_on_set(pipeline, self.train_images, self.train_labels) - test_results = self._evaluate_pipeline_on_set(pipeline, self.test_images, self.test_labels) - - return ClassificationResults( - train_results=train_results, test_results=test_results, full_pipeline_name=repr(pipeline), + def _evaluate_pipeline(self, pipeline: ClassificationPipeline) -> Iterable[ContextualizedClassificationPerformance]: + return chain( + self._evaluate_pipeline_on_set(pipeline, self.train_datasets, Set.TRAIN), + self._evaluate_pipeline_on_set(pipeline, self.test_datasets, Set.TEST), ) @staticmethod def _evaluate_pipeline_on_set( - pipeline: Pipeline, images: List[Image], labels: List[TargetT] - ) -> SetClassificationResults: - t = time() - preds = pipeline.predict(images) - mean_time = (time() - t) / len(images) - return SetClassificationResults(_labels_to_numpy(labels), _labels_to_numpy(preds), mean_time) - - -def load_datasets( - roco_datasets: List[ROCODatasetBuilder], image_dataset_generator: ArmorValueDatasetGenerator[TargetT], -) -> Tuple[List[Path], List[Image], List[TargetT], List[int]]: - # TODO we should receive a list of FileImageDataset - datasets = [builder.build() for builder in image_dataset_generator.from_roco_datasets(roco_datasets)] - dataset_sizes = [len(d) for d in datasets] - - dataset = UnionDataset(datasets) - paths, targets = list(dataset.examples), list(dataset.targets) - images = list(load_images(paths)) - return paths, images, targets, dataset_sizes + pipeline: ClassificationPipeline, datasets: List[FileImageDataset], set_: Set + ) -> Iterable[ContextualizedClassificationPerformance]: + for dataset in datasets: + t = time() + proba, classes = pipeline.predict_proba_and_classes(file_images_to_images(dataset.examples)) + mean_time = (time() - t) / len(dataset) + yield ContextualizedClassificationPerformance( + examples=dataset.examples, + labels=_labels_to_numpy(dataset.targets), + predictions=_labels_to_numpy(classes), + proba=proba, + mean_inference_time=mean_time, + set_=set_, + dataset_name=dataset.name, + pipeline_name=pipeline.name, + ) def _labels_to_numpy(labels: List[Enum]) -> np.ndarray: diff --git a/robots-at-robots/research/robots_at_robots/evaluation/metrics/__init__.py b/robots-at-robots/research/robots_at_robots/evaluation/metrics/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/robots-at-robots/research/robots_at_robots/evaluation/metrics/accuracy.py b/robots-at-robots/research/robots_at_robots/evaluation/metrics/accuracy.py new file mode 100644 index 0000000..ccfe9c7 --- /dev/null +++ b/robots-at-robots/research/robots_at_robots/evaluation/metrics/accuracy.py @@ -0,0 +1,11 @@ +from research.robots_at_robots.evaluation.metrics.metric_abc import MetricABC +from research.robots_at_robots.evaluation.performance import ClassificationPerformance + + +class AccuracyMetric(MetricABC): + def __call__(self, performance: ClassificationPerformance) -> float: + return (performance.labels == performance.predictions).mean() + + @property + def name(self) -> str: + return "accuracy" diff --git a/robots-at-robots/research/robots_at_robots/evaluation/metrics/f1.py b/robots-at-robots/research/robots_at_robots/evaluation/metrics/f1.py new file mode 100644 index 0000000..dd5f48a --- /dev/null +++ b/robots-at-robots/research/robots_at_robots/evaluation/metrics/f1.py @@ -0,0 +1,30 @@ +from enum import Enum, auto + +from sklearn.metrics import f1_score + +from research.robots_at_robots.evaluation.metrics.metric_abc import MetricABC +from research.robots_at_robots.evaluation.performance import ClassificationPerformance + + +class F1Strategy(Enum): + MICRO = auto() + MACRO = auto() + SAMPLES = auto() + WEIGHTED = auto() + + def __repr__(self): + return self.name.lower() + + __str__ = __repr__ + + +class F1Metric(MetricABC): + def __init__(self, strategy: F1Strategy = F1Strategy.MACRO): + self.strategy = strategy + + def __call__(self, performance: ClassificationPerformance) -> float: + return f1_score(performance.labels, performance.predictions, average=str(self.strategy)) + + @property + def name(self) -> str: + return f"f1 {self.strategy}" diff --git a/robots-at-robots/research/robots_at_robots/evaluation/metrics/metric_abc.py b/robots-at-robots/research/robots_at_robots/evaluation/metrics/metric_abc.py new file mode 100644 index 0000000..f25a0c3 --- /dev/null +++ b/robots-at-robots/research/robots_at_robots/evaluation/metrics/metric_abc.py @@ -0,0 +1,17 @@ +from abc import ABC, abstractmethod + +from research.robots_at_robots.evaluation.performance import ClassificationPerformance + + +class MetricABC(ABC): + @abstractmethod + def __call__(self, performance: ClassificationPerformance) -> float: + pass + + @property + @abstractmethod + def name(self) -> str: + pass + + def __repr__(self): + return self.name diff --git a/robots-at-robots/research/robots_at_robots/evaluation/performance.py b/robots-at-robots/research/robots_at_robots/evaluation/performance.py new file mode 100644 index 0000000..33c0bc7 --- /dev/null +++ b/robots-at-robots/research/robots_at_robots/evaluation/performance.py @@ -0,0 +1,79 @@ +from dataclasses import dataclass +from typing import Dict, Iterable, List, Sequence + +import numpy as np +from memoized_property import memoized_property + +from polystar.common.filters.filter_abc import FilterABC +from polystar.common.models.image import FileImage +from polystar.common.utils.iterable_utils import flatten, group_by +from research.robots_at_robots.evaluation.set import Set + + +@dataclass +class ClassificationPerformance: + examples: List[FileImage] + labels: np.ndarray + predictions: np.ndarray + proba: np.ndarray + mean_inference_time: float + + @property + def mistakes(self) -> Sequence[int]: + return np.where(self.labels != self.predictions)[0] + + @memoized_property + def unique_labels(self): + return sorted(set(self.labels) | set(self.predictions)) + + def __len__(self) -> int: + return len(self.labels) + + +@dataclass +class ContextualizedClassificationPerformance(ClassificationPerformance): + set_: Set + dataset_name: str + pipeline_name: str + + +@dataclass +class ClassificationPerformances(Iterable[ContextualizedClassificationPerformance]): + performances: List[ContextualizedClassificationPerformance] + + @property + def train(self) -> "ClassificationPerformances": + return self.on_set(Set.TRAIN) + + @property + def test(self) -> "ClassificationPerformances": + return self.on_set(Set.TEST) + + def on_set(self, set_: Set) -> "ClassificationPerformances": + return ClassificationPerformances(SetClassificationPerformanceFilter(set_).filter(self.performances)) + + def group_by_pipeline(self) -> Dict[str, "ClassificationPerformances"]: + return { + name: ClassificationPerformances(performances) + for name, performances in group_by(self, lambda p: p.pipeline_name).items() + } + + def merge(self) -> ClassificationPerformance: + return ClassificationPerformance( + examples=flatten(p.examples for p in self), + labels=np.concatenate([p.labels for p in self]), + predictions=np.concatenate([p.predictions for p in self]), + proba=np.concatenate([p.proba for p in self]), + mean_inference_time=np.average([p.mean_inference_time for p in self], weights=[len(p) for p in self]), + ) + + def __iter__(self): + return iter(self.performances) + + +@dataclass +class SetClassificationPerformanceFilter(FilterABC[ContextualizedClassificationPerformance]): + set_: Set + + def validate_single(self, perf: ContextualizedClassificationPerformance) -> bool: + return perf.set_ is self.set_ diff --git a/robots-at-robots/research/robots_at_robots/evaluation/set.py b/robots-at-robots/research/robots_at_robots/evaluation/set.py new file mode 100644 index 0000000..6175a68 --- /dev/null +++ b/robots-at-robots/research/robots_at_robots/evaluation/set.py @@ -0,0 +1,14 @@ +from dataclasses import dataclass +from enum import Enum, auto + + +@dataclass +class Set(Enum): + TRAIN = auto() + VALIDATION = auto() + TEST = auto() + + def __repr__(self): + return self.name.lower() + + __str__ = __repr__ diff --git a/robots-at-robots/research/robots_at_robots/evaluation/trainer.py b/robots-at-robots/research/robots_at_robots/evaluation/trainer.py new file mode 100644 index 0000000..6731cd0 --- /dev/null +++ b/robots-at-robots/research/robots_at_robots/evaluation/trainer.py @@ -0,0 +1,25 @@ +from typing import Generic, List + +from tqdm import tqdm + +from polystar.common.models.image import file_images_to_images +from polystar.common.pipeline.classification.classification_pipeline import ClassificationPipeline +from research.common.datasets.image_dataset import FileImageDataset +from research.common.datasets.lazy_dataset import TargetT +from research.common.datasets.union_dataset import UnionDataset + + +class ImageClassificationPipelineTrainer(Generic[TargetT]): + def __init__(self, training_datasets: List[FileImageDataset]): + train_dataset = UnionDataset(training_datasets) + self.images = file_images_to_images(train_dataset.examples) + self.labels = train_dataset.targets + + def train_pipeline(self, pipeline: ClassificationPipeline): + pipeline.fit(self.images, self.labels) + + def train_pipelines(self, pipelines: List[ClassificationPipeline]): + tqdm_pipelines = tqdm(pipelines, desc="Training Pipelines") + for pipeline in tqdm_pipelines: + tqdm_pipelines.set_postfix({"pipeline": pipeline.name}, True) + self.train_pipeline(pipeline) -- GitLab