dataset_88kps+some-updates

2022-08-14 09:59:01 +03:00
parent ac04526fe1
commit 998aec2b80
943 changed files with 945 additions and 938 deletions
--- a/Karussell/Training/openpifpaf_karusell/init.py
+++ b/Karussell/Training/openpifpaf_karusell/init.py
@@ -0,0 +1,16 @@
+import openpifpaf
+
+from . import karusel_kp
+
+
+def register():
+    openpifpaf.DATAMODULES['karusel'] = karusel_kp.karusel_Kp
+    openpifpaf.CHECKPOINT_URLS['shufflenetv2k16-apollo-24'] = \
+        "http://github.com/DuncanZauss/openpifpaf_assets/releases/" \
+        "download/v0.1.0/shufflenetv2k16-201113-135121-apollo.pkl.epoch290"
+    openpifpaf.CHECKPOINT_URLS['shufflenetv2k16-apollo-66'] = \
+        "http://github.com/DuncanZauss/openpifpaf_assets/releases/" \
+        "download/v0.1.0/sk16_apollo_66kp.pkl"
+    openpifpaf.CHECKPOINT_URLS['shufflenetv2k30-apollo-66'] = \
+        "http://github.com/DuncanZauss/openpifpaf_assets/releases/" \
+        "download/v0.1.0/sk30_apollo_66kp.pkl"
--- a/Karussell/Training/openpifpaf_karusell/pycache/init.cpython-38.pyc
+++ b/Karussell/Training/openpifpaf_karusell/pycache/init.cpython-38.pyc
--- a/Karussell/Training/openpifpaf_karusell/pycache/init.cpython-39.pyc
+++ b/Karussell/Training/openpifpaf_karusell/pycache/init.cpython-39.pyc
--- a/Karussell/Training/openpifpaf_karusell/pycache/constants.cpython-38.pyc
+++ b/Karussell/Training/openpifpaf_karusell/pycache/constants.cpython-38.pyc
--- a/Karussell/Training/openpifpaf_karusell/pycache/constants.cpython-39.pyc
+++ b/Karussell/Training/openpifpaf_karusell/pycache/constants.cpython-39.pyc
--- a/Karussell/Training/openpifpaf_karusell/pycache/formulaE_kp.cpython-39.pyc
+++ b/Karussell/Training/openpifpaf_karusell/pycache/formulaE_kp.cpython-39.pyc
--- a/Karussell/Training/openpifpaf_karusell/pycache/karusel_kp.cpython-38.pyc
+++ b/Karussell/Training/openpifpaf_karusell/pycache/karusel_kp.cpython-38.pyc
--- a/Karussell/Training/openpifpaf_karusell/pycache/karusel_kp.cpython-39.pyc
+++ b/Karussell/Training/openpifpaf_karusell/pycache/karusel_kp.cpython-39.pyc
--- a/Karussell/Training/openpifpaf_karusell/pycache/metrics.cpython-38.pyc
+++ b/Karussell/Training/openpifpaf_karusell/pycache/metrics.cpython-38.pyc
--- a/Karussell/Training/openpifpaf_karusell/pycache/metrics.cpython-39.pyc
+++ b/Karussell/Training/openpifpaf_karusell/pycache/metrics.cpython-39.pyc
--- a/Karussell/Training/openpifpaf_karusell/constants.py
+++ b/Karussell/Training/openpifpaf_karusell/constants.py
@@ -0,0 +1,417 @@
+import os
+
+import numpy as np
+try:
+    import matplotlib.cm as mplcm
+    from matplotlib.animation import FuncAnimation
+    from mpl_toolkits.mplot3d import Axes3D
+except ImportError:
+    pass
+
+import openpifpaf
+
+CAR_KEYPOINTS_24 = list(range(1, 89))
+
+CAR_SKELETON_24 = [[ 1,  2],
+       [ 2,  3],
+       [ 3,  4],
+       [ 4,  5],
+       [ 5,  6],
+       [ 6,  7],
+       [ 7,  8],
+       [ 8,  9],
+       [ 9, 10],
+       [10, 11],
+       [11, 12],
+       [12, 13],
+       [13, 14],
+       [14, 15],
+       [15, 16],
+       [16, 17],
+       [17, 18],
+       [18, 19],
+       [19, 20],
+       [20, 21],
+       [21, 22],
+       [22, 23],
+       [25, 26],
+       [23, 24],
+       [24, 25],
+       [26, 27],
+       [27, 28],
+       [28, 29],
+       [29, 30],
+       [30, 31],
+       [31, 32],
+       [32, 33],
+       [33, 34],
+       [34, 35],
+       [35, 36],
+       [36, 37],
+       [37, 38],
+       [38, 39],
+       [39, 40],
+       [40, 41],
+       [41, 42],
+       [42, 43],
+       [43, 44],
+       [44, 45],
+       [45, 46],
+       [46, 47],
+       [47, 48],
+       [48, 49],
+       [49, 50],
+       [50, 51],
+       [51, 52],
+       [52, 53],
+       [53, 54],
+       [54, 55],
+       [55, 56],
+       [56, 57],
+       [57,  1],
+       [57, 58],
+       [38, 58],
+       [58, 59],
+       [59, 60],
+       [60, 61],
+       [61, 62],
+       [62, 63],
+       [63, 64],
+       [64, 65],
+       [65, 66],
+       [66, 67],
+       [67, 68],
+       [69, 70],
+       [70, 71],
+       [71, 72],
+       [72, 73],
+       [73, 74],
+       [74, 75],
+       [75, 76],
+       [76, 77],
+       [77, 78],
+       [59, 69],
+       [78, 68],
+       [79, 80],
+       [80, 81],
+       [81, 82],
+       [82, 83],
+       [83, 84],
+       [84, 85],
+       [85, 86],
+       [86, 87],
+       [87, 88],
+       [58, 79],
+       [88, 68],
+       [69,  1],
+       [37, 79]]
+
+CAR_CATEGORIES_24 = ['karusel']
+
+
+CAR_SCORE_WEIGHTS_24 = [1.0]*len(CAR_KEYPOINTS_24)
+
+CAR_SIGMAS_24 = [0.05] * len(CAR_KEYPOINTS_24)
+
+CAR_POSE_24 = np.array([[-177.26242219, -152.48617537,    0.        ],
+       [-184.66727021, -125.83719367,    0.        ],
+       [-193.12308373,  -98.24191839,    0.        ],
+       [-202.42051892,  -73.69486768,    0.        ],
+       [-212.03196984,  -43.15603975,    0.        ],
+       [-216.33625735,  -18.34730925,    0.        ],
+       [-216.80303459,    9.66665368,    0.        ],
+       [-213.95990749,   31.84584727,    0.        ],
+       [-210.22282276,   56.07463589,    0.        ],
+       [-200.59863274,   82.61469931,    0.        ],
+       [-188.82017578,  106.26354604,    0.        ],
+       [-175.04445976,  130.01706468,    0.        ],
+       [-157.11721772,  150.98403855,    0.        ],
+       [-137.03570875,  169.05979572,    0.        ],
+       [-116.84952786,  185.13829382,    0.        ],
+       [ -97.40029673,  196.17130829,    0.        ],
+       [ -72.90558197,  206.46737301,    0.        ],
+       [ -47.2552298 ,  213.81988508,    0.        ],
+       [ -11.46157442,  218.69986811,    0.        ],
+       [  29.74391629,  215.85249464,    0.        ],
+       [  60.12148998,  209.43353705,    0.        ],
+       [  85.61058791,  200.75564061,    0.        ],
+       [ 106.21121008,  189.81880532,    0.        ],
+       [ 125.97021059,  175.83374547,    0.        ],
+       [ 145.833883  ,  159.85142654,    0.        ],
+       [ 163.85730422,  140.76854711,    0.        ],
+       [ 181.03910377,  118.63744311,    0.        ],
+       [ 194.27872114,   95.29836575,    0.        ],
+       [ 202.57752678,   70.69897907,    0.        ],
+       [ 210.87633243,   46.0995924 ,    0.        ],
+       [ 214.33899828,   18.24263734,    0.        ],
+       [ 215.54272527,   -4.72584196,    0.        ],
+       [ 212.908942  ,  -30.89955369,    0.        ],
+       [ 207.22693416,  -56.23164375,    0.        ],
+       [ 197.60274414,  -82.77170718,    0.        ],
+       [ 186.92758863, -108.36547702,    0.        ],
+       [ 173.36121642, -136.1135138 ,    0.        ],
+       [  92.94749408, -130.31409495,    0.        ],
+       [  89.74650802, -107.45028757,    0.        ],
+       [  84.54826289,  -84.69115209,    0.        ],
+       [  78.35138822,  -61.98435258,    0.        ],
+       [  69.15862495,  -39.43456093,    0.        ],
+       [  58.96723214,  -16.93710523,    0.        ],
+       [  46.83091622,    4.45704901,    0.        ],
+       [  27.91353738,   21.49033343,    0.        ],
+       [   3.42306899,   30.22056582,    0.        ],
+       [ -18.38977291,   26.07328618,    0.        ],
+       [ -27.68720809,   50.6203369 ,    0.        ],
+       [ -39.67500888,   30.96600831,    0.        ],
+       [ -52.66143919,   11.25934376,    0.        ],
+       [ -64.59690402,   -9.39361436,    0.        ],
+       [ -75.53373931,  -29.99423653,    0.        ],
+       [ -87.46920414,  -50.64719466,    0.        ],
+       [ -96.30410845,  -73.14040399,    0.        ],
+       [-104.14038323,  -95.58127736,    0.        ],
+       [-110.97802847, -117.96981477,    0.        ],
+       [-115.81841464, -140.25368027,    0.        ],
+       [  -9.87599751, -212.80851645,    0.        ],
+       [-143.38258579, -244.83960891,    0.        ],
+       [-138.76003618, -256.6138195 ,    0.        ],
+       [-135.03144419, -270.43762512,    0.        ],
+       [-130.30422266, -284.20909478,    0.        ],
+       [-127.52192425, -299.08386589,    0.        ],
+       [-123.79333226, -312.90767151,    0.        ],
+       [-119.11844669, -325.68051164,    0.        ],
+       [-116.38848423, -339.55665321,    0.        ],
+       [-112.55522032, -355.3777179 ,    0.        ],
+       [-109.87759382, -368.25522994,    0.        ],
+       [-195.84742026, -294.65367464,    0.        ],
+       [-192.11882827, -308.47748026,    0.        ],
+       [-188.44257223, -321.30265634,    0.        ],
+       [-184.71398024, -335.12646196,    0.        ],
+       [-182.0886897 , -347.00534446,    0.        ],
+       [-179.4110632 , -359.8828565 ,    0.        ],
+       [-175.73480716, -372.70803258,    0.        ],
+       [-173.10951662, -384.58691509,    0.        ],
+       [-170.43189012, -397.46442713,    0.        ],
+       [-167.75426362, -410.34193917,    0.        ],
+       [ 192.78072299, -258.26457915,    0.        ],
+       [ 188.46794275, -271.50844288,    0.        ],
+       [ 182.05323153, -282.85971946,    0.        ],
+       [ 177.74045128, -296.10358319,    0.        ],
+       [ 172.42904151, -309.39978288,    0.        ],
+       [ 168.11626126, -322.64364661,    0.        ],
+       [ 163.80348102, -335.88751034,    0.        ],
+       [ 159.49070078, -349.13137408,    0.        ],
+       [ 155.17792054, -362.37523781,    0.        ],
+       [ 151.91610578, -376.56539512,    0.        ]])
+
+HFLIP_24 = {
+    '1': '1',
+    '2': '2',
+    '3': '3',
+    '4': '4',
+    '5': '5',
+    '6': '6',
+    '7': '7',
+    '8': '8',
+    '9': '9',
+    '10': '10',
+    '11': '11',
+    '12': '12',
+    '13': '13',
+    '14': '14',
+    '15': '15',
+    '16': '16',
+    '17': '17',
+    '18': '18',
+    '19': '19',
+    '20': '20',
+    '21': '21',
+    '22': '22'
+
+}
+
+training_weights_local_centrality = [
+    0.890968488270775,
+    0.716506138617812,
+    1.05674590410869,
+    0.764774195768455,
+    0.637682585483328,
+    0.686680807728366,
+    0.955422595797394,
+    0.936714585642375,
+    1.34823795445326,
+    1.38308992581967,
+    1.32689945125819,
+    1.38838655605483,
+    1.18980184904613,
+    1.02584355494795,
+    0.90969156732068,
+    1.24732068576104,
+    1.11338768064342,
+    0.933815217550391,
+    0.852297518872114,
+    1.04167641424727,
+    1.01668968075247,
+    1.34625964088011,
+    0.911796331039028,
+    0.866206536337413,
+    1.55957820407853,
+    0.730844382675724,
+    0.651138644197359,
+    0.758018559633786,
+    1.31842501396691,
+    1.32186116654782,
+    0.744347016851606,
+    0.636390683664723,
+    0.715244950821949,
+    1.63122349407032,
+    0.849835699185461,
+    0.910488007220499,
+    1.44244151650561,
+    1.14150437331681,
+    1.19808610191343,
+    0.960186788642886,
+    1.05023623286937,
+    1.19761709710598,
+    1.3872216313401,
+    1.01256700741214,
+    1.1167909667759,
+    1.27893496336199,
+    1.54475684725655,
+    1.40343733870633,
+    1.45552060866114,
+    1.47264222155031,
+    0.970060423999993,
+    0.944450314768933,
+    0.623987071240172,
+    0.5745237907704,
+    0.66890646050993,
+    0.978411632994504,
+    0.587396395188292,
+    0.76307999741129,
+    0.609793563449648,
+    0.67983566494545,
+    0.685883538168462,
+    0.753587600664775,
+    0.770335133588157,
+    0.764713638033368,
+    0.792364155965385,
+    0.796435233566833
+]
+
+
+def get_constants(num_kps):
+    if num_kps == 24:
+        CAR_POSE_24[:, 2] = 2.0
+        return [CAR_KEYPOINTS_24, CAR_SKELETON_24, HFLIP_24, CAR_SIGMAS_24,
+                CAR_POSE_24, CAR_CATEGORIES_24, CAR_SCORE_WEIGHTS_24]
+    if num_kps == 66:
+        CAR_POSE_66[:, 2] = 2.0
+        return [CAR_KEYPOINTS_66, CAR_SKELETON_66, HFLIP_66, CAR_SIGMAS_66,
+                CAR_POSE_66, CAR_CATEGORIES_66, CAR_SCORE_WEIGHTS_66]
+    # using no if-elif-else construction due to pylint no-else-return error
+    raise Exception("Only poses with 24 or 66 keypoints are available.")
+
+
+def draw_ann(ann, *, keypoint_painter, filename=None, margin=0.5, aspect=None, **kwargs):
+    from openpifpaf import show  # pylint: disable=import-outside-toplevel
+
+    bbox = ann.bbox()
+    xlim = bbox[0] - margin, bbox[0] + bbox[2] + margin
+    ylim = bbox[1] - margin, bbox[1] + bbox[3] + margin
+    if aspect == 'equal':
+        fig_w = 5.0
+    else:
+        fig_w = 5.0 / (ylim[1] - ylim[0]) * (xlim[1] - xlim[0])
+
+    with show.canvas(filename, figsize=(fig_w, 5), nomargin=True, **kwargs) as ax:
+        ax.set_axis_off()
+        ax.set_xlim(*xlim)
+        ax.set_ylim(*ylim)
+
+        if aspect is not None:
+            ax.set_aspect(aspect)
+
+        keypoint_painter.annotation(ax, ann)
+
+
+def draw_skeletons(pose, sigmas, skel, kps, scr_weights):
+    from openpifpaf.annotation import Annotation  # pylint: disable=import-outside-toplevel
+    from openpifpaf import show  # pylint: disable=import-outside-toplevel
+
+    scale = np.sqrt(
+        (np.max(pose[:, 0]) - np.min(pose[:, 0]))
+        * (np.max(pose[:, 1]) - np.min(pose[:, 1]))
+    )
+
+    show.KeypointPainter.show_joint_scales = True
+    keypoint_painter = show.KeypointPainter()
+    ann = Annotation(keypoints=kps, skeleton=skel, score_weights=scr_weights)
+    ann.set(pose, np.array(sigmas) * scale)
+    os.makedirs('docs', exist_ok=True)
+    draw_ann(ann, filename='docs/skeleton_car.png', keypoint_painter=keypoint_painter)
+
+
+def plot3d_red(ax_2D, p3d, skeleton):
+    skeleton = [(bone[0] - 1, bone[1] - 1) for bone in skeleton]
+
+    rot_p90_x = np.array([[1, 0, 0], [0, 0, 1], [0, 1, 0]])
+    p3d = p3d @ rot_p90_x
+
+    fig = ax_2D.get_figure()
+    ax = Axes3D(fig, auto_add_to_figure=False)
+    fig.add_axes(ax)
+    ax.set_axis_off()
+    ax_2D.set_axis_off()
+
+    ax.view_init(azim=-90, elev=20)
+    ax.set_xlabel('X')
+    ax.set_ylabel('Y')
+    ax.set_zlabel('Z')
+    max_range = np.array([p3d[:, 0].max() - p3d[:, 0].min(),
+                          p3d[:, 1].max() - p3d[:, 1].min(),
+                          p3d[:, 2].max() - p3d[:, 2].min()]).max() / 2.0
+    mid_x = (p3d[:, 0].max() + p3d[:, 0].min()) * 0.5
+    mid_y = (p3d[:, 1].max() + p3d[:, 1].min()) * 0.5
+    mid_z = (p3d[:, 2].max() + p3d[:, 2].min()) * 0.5
+
+    ax.set_xlim(mid_x - max_range, mid_x + max_range)
+    ax.set_ylim(mid_y - max_range, mid_y + max_range)
+    ax.set_zlim(mid_z - max_range, mid_z + max_range)  # pylint: disable=no-member
+
+    for ci, bone in enumerate(skeleton):
+        c = mplcm.get_cmap('tab20')((ci % 20 + 0.05) / 20)  # Same coloring as Pifpaf preds
+        ax.plot(p3d[bone, 0], p3d[bone, 1], p3d[bone, 2], color=c)
+
+    def animate(i):
+        ax.view_init(elev=10., azim=i)
+        return fig
+
+    return FuncAnimation(fig, animate, frames=360, interval=100)
+
+
+def print_associations():
+    print("\nAssociations of the car skeleton with 24 keypoints")
+    for j1, j2 in CAR_SKELETON_24:
+        print(CAR_KEYPOINTS_24[j1 - 1], '-', CAR_KEYPOINTS_24[j2 - 1])
+    print("\nAssociations of the car skeleton with 66 keypoints")
+    for j1, j2 in CAR_SKELETON_66:
+        print(CAR_KEYPOINTS_66[j1 - 1], '-', CAR_KEYPOINTS_66[j2 - 1])
+
+
+def main():
+#     print_associations()
+# =============================================================================
+#     draw_skeletons(CAR_POSE_24, sigmas = CAR_SIGMAS_24, skel = CAR_SKELETON_24,
+#                    kps = CAR_KEYPOINTS_24, scr_weights = CAR_SCORE_WEIGHTS_24)
+#     draw_skeletons(CAR_POSE_66, sigmas = CAR_SIGMAS_66, skel = CAR_SKELETON_66,
+#                    kps = CAR_KEYPOINTS_66, scr_weights = CAR_SCORE_WEIGHTS_66)
+# =============================================================================
+#     with openpifpaf.show.Canvas.blank(nomargin=True) as ax_2D:
+#         anim_66 = plot3d_red(ax_2D, CAR_POSE_66, CAR_SKELETON_66)
+#         anim_66.save('./CAR_66_Pose.gif', fps=30)
+    with openpifpaf.show.Canvas.blank(nomargin=True) as ax_2D:
+        anim_24 = plot3d_red(ax_2D, CAR_POSE_24, CAR_SKELETON_24)
+        anim_24.save('./CAR_24_Pose.gif', fps=30)
+
+
+if __name__ == '__main__':
+    main()
--- a/Karussell/Training/openpifpaf_karusell/karusel_kp.py
+++ b/Karussell/Training/openpifpaf_karusell/karusel_kp.py
@@ -0,0 +1,332 @@
+"""
+Interface for custom data.
+
+This module handles datasets and is the class that you need to inherit from for your custom dataset.
+This class gives you all the handles so that you can train with a new –dataset=mydataset.
+The particular configuration of keypoints and skeleton is specified in the headmeta instances
+"""
+
+
+import argparse
+import torch
+import numpy as np
+try:
+    from pycocotools.coco import COCO
+except ImportError:
+    COCO = None
+
+from openpifpaf.datasets import DataModule
+from openpifpaf import encoder, headmeta, metric, transforms
+from openpifpaf.datasets import collate_images_anns_meta, collate_images_targets_meta
+from openpifpaf.plugins.coco import CocoDataset as CocoLoader
+
+from .constants import get_constants, training_weights_local_centrality
+from .metrics import MeanPixelError
+
+
+class karusel_Kp(DataModule):
+    """
+    DataModule for the karusel Dataset.
+    """
+
+    train_annotations = 'Karusel_dataset_v2_88/annotations/train.json'
+    val_annotations = 'Karusel_dataset_v2_88/annotations/val.json'
+    eval_annotations = val_annotations
+    train_image_dir = 'Karusel_dataset_v2_88/images/train/'
+    val_image_dir = 'Karusel_dataset_v2_88/images/val/'
+    eval_image_dir = val_image_dir
+
+    n_images = None
+    square_edge = 513
+    extended_scale = False
+    orientation_invariant = 0.0
+    blur = 0.0
+    augmentation = True
+    rescale_images = 1.0
+    upsample_stride = 1
+    min_kp_anns = 1
+    b_min = 1  # 1 pixel
+
+    eval_annotation_filter = True
+    eval_long_edge = 0  # set to zero to deactivate rescaling
+    eval_orientation_invariant = 0.0
+    eval_extended_scale = False
+
+    def __init__(self):
+        super().__init__()
+        if self.weights is not None:
+            caf_weights = []
+            for bone in self.CAR_SKELETON:
+                caf_weights.append(max(self.weights[bone[0] - 1],
+                                       self.weights[bone[1] - 1]))
+            w_np = np.array(caf_weights)
+            caf_weights = list(w_np / np.sum(w_np) * len(caf_weights))
+        else:
+            caf_weights = None
+        cif = headmeta.Cif('cif', 'apollo',
+                           keypoints=self.CAR_KEYPOINTS,
+                           sigmas=self.CAR_SIGMAS,
+                           pose=self.CAR_POSE,
+                           draw_skeleton=self.CAR_SKELETON,
+                           score_weights=self.CAR_SCORE_WEIGHTS,
+                           training_weights=self.weights)
+        caf = headmeta.Caf('caf', 'apollo',
+                           keypoints=self.CAR_KEYPOINTS,
+                           sigmas=self.CAR_SIGMAS,
+                           pose=self.CAR_POSE,
+                           skeleton=self.CAR_SKELETON,
+                           training_weights=caf_weights)
+
+        cif.upsample_stride = self.upsample_stride
+        caf.upsample_stride = self.upsample_stride
+        self.head_metas = [cif, caf]
+
+    @classmethod
+    def cli(cls, parser: argparse.ArgumentParser):
+        group = parser.add_argument_group('data module Apollo')
+
+        group.add_argument('--karusel-train-annotations',
+                           default=cls.train_annotations)
+        group.add_argument('--karusel-val-annotations',
+                           default=cls.val_annotations)
+        group.add_argument('--karusel-train-image-dir',
+                           default=cls.train_image_dir)
+        group.add_argument('--karusel-val-image-dir',
+                           default=cls.val_image_dir)
+
+        group.add_argument('--karusel-square-edge',
+                           default=cls.square_edge, type=int,
+                           help='square edge of input images')
+        assert not cls.extended_scale
+        group.add_argument('--karusel-extended-scale',
+                           default=False, action='store_true',
+                           help='augment with an extended scale range')
+        group.add_argument('--karusel-orientation-invariant',
+                           default=cls.orientation_invariant, type=float,
+                           help='augment with random orientations')
+        group.add_argument('--karusel-blur',
+                           default=cls.blur, type=float,
+                           help='augment with blur')
+        assert cls.augmentation
+        group.add_argument('--karusel-no-augmentation',
+                           dest='karusel_augmentation',
+                           default=True, action='store_false',
+                           help='do not apply data augmentation')
+        group.add_argument('--karusel-rescale-images',
+                           default=cls.rescale_images, type=float,
+                           help='overall rescale factor for images')
+        group.add_argument('--karusel-upsample',
+                           default=cls.upsample_stride, type=int,
+                           help='head upsample stride')
+        group.add_argument('--karusel-min-kp-anns',
+                           default=cls.min_kp_anns, type=int,
+                           help='filter images with fewer keypoint annotations')
+        group.add_argument('--karusel-bmin',
+                           default=cls.b_min, type=int,
+                           help='b minimum in pixels')
+        group.add_argument('--karusel-apply-local-centrality-weights',
+                           dest='karusel_apply_local_centrality',
+                           default=False, action='store_true',
+                           help='Weigh the CIF and CAF head during training.')
+
+        # evaluation
+        assert cls.eval_annotation_filter
+        group.add_argument('--karusel-no-eval-annotation-filter',
+                           dest='karusel_eval_annotation_filter',
+                           default=True, action='store_false')
+        group.add_argument('--karusel-eval-long-edge', default=cls.eval_long_edge, type=int,
+                           help='set to zero to deactivate rescaling')
+        assert not cls.eval_extended_scale
+        group.add_argument('--karusel-eval-extended-scale', default=False, action='store_true')
+        group.add_argument('--karusel-eval-orientation-invariant',
+                           default=cls.eval_orientation_invariant, type=float)
+        group.add_argument('--karusel-use-88-kps', default=False, action='store_true',
+                           help=('The ApolloCar3D dataset can '
+                                 'be trained with 24 or 66 kps. If you want to train a model '
+                                 'with 24 kps activate this flag. Change the annotations '
+                                 'path to the json files with 24 kps.'))
+
+    @classmethod
+    def configure(cls, args: argparse.Namespace):
+        # extract global information
+        cls.debug = args.debug
+        cls.pin_memory = args.pin_memory
+
+        # Apollo specific
+        cls.train_annotations = args.karusel_train_annotations
+        cls.val_annotations = args.karusel_val_annotations
+        cls.eval_annotations = cls.val_annotations
+        cls.train_image_dir = args.karusel_train_image_dir
+        cls.val_image_dir = args.karusel_val_image_dir
+        cls.eval_image_dir = cls.val_image_dir
+
+        cls.square_edge = args.karusel_square_edge
+        cls.extended_scale = args.karusel_extended_scale
+        cls.orientation_invariant = args.karusel_orientation_invariant
+        cls.blur = args.karusel_blur
+        cls.augmentation = args.karusel_augmentation  # loaded by the dest name
+        cls.rescale_images = args.karusel_rescale_images
+        cls.upsample_stride = args.karusel_upsample
+        cls.min_kp_anns = args.karusel_min_kp_anns
+        cls.b_min = args.karusel_bmin
+        if args.karusel_use_88_kps:
+            (cls.CAR_KEYPOINTS, cls.CAR_SKELETON, cls.HFLIP, cls.CAR_SIGMAS, cls.CAR_POSE,
+             cls.CAR_CATEGORIES, cls.CAR_SCORE_WEIGHTS) = get_constants(24)
+        else:
+            (cls.CAR_KEYPOINTS, cls.CAR_SKELETON, cls.HFLIP, cls.CAR_SIGMAS, cls.CAR_POSE,
+             cls.CAR_CATEGORIES, cls.CAR_SCORE_WEIGHTS) = get_constants(66)
+        # evaluation
+        cls.eval_annotation_filter = args.karusel_eval_annotation_filter
+        cls.eval_long_edge = args.karusel_eval_long_edge
+        cls.eval_orientation_invariant = args.karusel_eval_orientation_invariant
+        cls.eval_extended_scale = args.karusel_eval_extended_scale
+        if args.karusel_apply_local_centrality:
+            if args.karusel_use_24_kps:
+                raise Exception("Applying local centrality weights only works with 66 kps.")
+            cls.weights = training_weights_local_centrality
+        else:
+            cls.weights = None
+
+    def _preprocess(self):
+        encoders = (encoder.Cif(self.head_metas[0], bmin=self.b_min),
+                    encoder.Caf(self.head_metas[1], bmin=self.b_min))
+
+        if not self.augmentation:
+            return transforms.Compose([
+                transforms.NormalizeAnnotations(),
+                transforms.RescaleAbsolute(self.square_edge),
+                transforms.CenterPad(self.square_edge),
+                transforms.EVAL_TRANSFORM,
+                transforms.Encoders(encoders),
+            ])
+
+        if self.extended_scale:
+            rescale_t = transforms.RescaleRelative(
+                scale_range=(0.2 * self.rescale_images,
+                             2.0 * self.rescale_images),
+                power_law=True, stretch_range=(0.75, 1.33))
+        else:
+            rescale_t = transforms.RescaleRelative(
+                scale_range=(0.33 * self.rescale_images,
+                             1.33 * self.rescale_images),
+                power_law=True, stretch_range=(0.75, 1.33))
+
+        return transforms.Compose([
+            transforms.NormalizeAnnotations(),
+            # transforms.AnnotationJitter(),
+            transforms.RandomApply(transforms.HFlip(self.CAR_KEYPOINTS, self.HFLIP), 0.5),
+            rescale_t,
+            transforms.RandomApply(transforms.Blur(), self.blur),
+            transforms.RandomChoice(
+                [transforms.RotateBy90(),
+                 transforms.RotateUniform(30.0)],
+                [self.orientation_invariant, 0.2],
+            ),
+            transforms.Crop(self.square_edge, use_area_of_interest=True),
+            transforms.CenterPad(self.square_edge),
+            transforms.MinSize(min_side=32.0),
+            transforms.TRAIN_TRANSFORM,
+            transforms.Encoders(encoders),
+        ])
+
+    def train_loader(self):
+        train_data = CocoLoader(
+            image_dir=self.train_image_dir,
+            ann_file=self.train_annotations,
+            preprocess=self._preprocess(),
+            annotation_filter=True,
+            min_kp_anns=self.min_kp_anns,
+            category_ids=[1],
+        )
+        return torch.utils.data.DataLoader(
+            train_data, batch_size=self.batch_size, shuffle=not self.debug,
+            pin_memory=self.pin_memory, num_workers=self.loader_workers, drop_last=True,
+            collate_fn=collate_images_targets_meta)
+
+    def val_loader(self):
+        val_data = CocoLoader(
+            image_dir=self.val_image_dir,
+            ann_file=self.val_annotations,
+            preprocess=self._preprocess(),
+            annotation_filter=True,
+            min_kp_anns=self.min_kp_anns,
+            category_ids=[1],
+        )
+        return torch.utils.data.DataLoader(
+            val_data, batch_size=self.batch_size, shuffle=False,
+            pin_memory=self.pin_memory, num_workers=self.loader_workers, drop_last=True,
+            collate_fn=collate_images_targets_meta)
+
+    @classmethod
+    def common_eval_preprocess(cls):
+        rescale_t = None
+        if cls.eval_extended_scale:
+            assert cls.eval_long_edge
+            rescale_t = [
+                transforms.DeterministicEqualChoice([
+                    transforms.RescaleAbsolute(cls.eval_long_edge),
+                    transforms.RescaleAbsolute((cls.eval_long_edge - 1) // 2 + 1),
+                ], salt=1)
+            ]
+        elif cls.eval_long_edge:
+            rescale_t = transforms.RescaleAbsolute(cls.eval_long_edge)
+
+        if cls.batch_size == 1:
+            padding_t = transforms.CenterPadTight(16)
+        else:
+            assert cls.eval_long_edge
+            padding_t = transforms.CenterPad(cls.eval_long_edge)
+
+        orientation_t = None
+        if cls.eval_orientation_invariant:
+            orientation_t = transforms.DeterministicEqualChoice([
+                None,
+                transforms.RotateBy90(fixed_angle=90),
+                transforms.RotateBy90(fixed_angle=180),
+                transforms.RotateBy90(fixed_angle=270),
+            ], salt=3)
+
+        return [
+            transforms.NormalizeAnnotations(),
+            rescale_t,
+            padding_t,
+            orientation_t,
+        ]
+
+    def _eval_preprocess(self):
+        return transforms.Compose([
+            *self.common_eval_preprocess(),
+            transforms.ToAnnotations([
+                transforms.ToKpAnnotations(
+                    self.CAR_CATEGORIES,
+                    keypoints_by_category={1: self.head_metas[0].keypoints},
+                    skeleton_by_category={1: self.head_metas[1].skeleton},
+                ),
+                transforms.ToCrowdAnnotations(self.CAR_CATEGORIES),
+            ]),
+            transforms.EVAL_TRANSFORM,
+        ])
+
+    def eval_loader(self):
+        eval_data = CocoLoader(
+            image_dir=self.eval_image_dir,
+            ann_file=self.eval_annotations,
+            preprocess=self._eval_preprocess(),
+            annotation_filter=self.eval_annotation_filter,
+            min_kp_anns=self.min_kp_anns if self.eval_annotation_filter else 0,
+            category_ids=[1] if self.eval_annotation_filter else [],
+        )
+        return torch.utils.data.DataLoader(
+            eval_data, batch_size=self.batch_size, shuffle=False,
+            pin_memory=self.pin_memory, num_workers=self.loader_workers, drop_last=False,
+            collate_fn=collate_images_anns_meta)
+
+# TODO: make sure that 24kp flag is activated when evaluating a 24kp model
+    def metrics(self):
+        return [metric.Coco(
+            COCO(self.eval_annotations),
+            max_per_image=20,
+            category_ids=[1],
+            iou_type='keypoints',
+            keypoint_oks_sigmas=self.CAR_SIGMAS
+        ), MeanPixelError()]
--- a/Karussell/Training/openpifpaf_karusell/metrics.py
+++ b/Karussell/Training/openpifpaf_karusell/metrics.py
@@ -0,0 +1,131 @@
+import logging
+
+import numpy as np
+
+from openpifpaf.metric.base import Base
+from openpifpaf.annotation import Annotation
+
+try:
+    import scipy
+except ImportError:
+    scipy = None
+
+LOG = logging.getLogger(__name__)
+
+
+class MeanPixelError(Base):
+    """
+    Calculate mean pixel error and detection rate for a given image
+    and category in an "all-vs-all setting"
+    """
+    predictions = []
+    image_ids = []
+    errors = []  # mean pixel errors
+    detections = []  # detection rate
+    errors_scaled = []  # mean pixel errors
+    detections_scaled = []  # detection rate
+    px_ref = 368  # CPM crop size in pixels
+
+    def accumulate(self, predictions, image_meta, *, ground_truth=None):
+        errors = []
+        detections = []
+        errors_scaled = []
+        detections_scaled = []
+
+        # Filter ground-truth
+        for annotation in ground_truth:
+            if not isinstance(annotation, Annotation):
+                continue
+            indices_gt = np.nonzero(annotation.data[:, 2] > 1.0)
+            if indices_gt[0].size <= 3:
+                continue
+            gts = annotation.data[indices_gt, 0:2].squeeze()
+            width = float(annotation.fixed_bbox[2])
+            height = float(annotation.fixed_bbox[3])
+            scale = np.array([self.px_ref / width, self.px_ref / height]).reshape(1, 2)
+
+            # Evaluate each keypoint
+            for idx, gt in zip(indices_gt[0], gts):
+                preds = np.array([p.data[idx] for p in predictions]).reshape(-1, 3)[:, 0:2]
+                if preds.size <= 0:
+                    continue
+                i = np.argmin(np.linalg.norm(preds - gt, axis=1))
+                dist = preds[i:i + 1] - gt
+                dist_scaled = dist * scale
+                d = float(np.linalg.norm(dist, axis=1))
+                d_scaled = float(np.linalg.norm(dist_scaled, axis=1))
+
+                # Prediction correct if error less than 10 pixels
+                if d < 10:
+                    errors.append(d)
+                    detections.append(1)
+                else:
+                    detections.append(0)
+                if d_scaled < 10:
+                    errors_scaled.append(d)
+                    detections_scaled.append(1)
+                else:
+                    detections_scaled.append(0)
+
+        # Stats for a single image
+        mpe = average(errors)
+        mpe_scaled = average(errors_scaled)
+        det_rate = 100 * average(detections)
+        det_rate_scaled = 100 * average(detections_scaled)
+        LOG.info('Mean Pixel Error (scaled): %s (%s)    Det. Rate (scaled): %s (%s)',
+                 str(mpe)[:4], str(mpe_scaled)[:4], str(det_rate)[:4], str(det_rate_scaled)[:4])
+
+        # Accumulate stats
+        self.errors.extend(errors)
+        self.detections.extend(detections)
+        self.errors_scaled.extend(errors_scaled)
+        self.detections_scaled.extend(detections_scaled)
+
+    def write_predictions(self, filename, *, additional_data=None):
+        raise NotImplementedError
+
+    def stats(self):
+        mpe = average(self.errors)
+        mpe_scaled = average(self.errors_scaled)
+        det_rate = 100 * average(self.detections)
+        det_rate_scaled = 100 * average(self.detections_scaled)
+        LOG.info('Final Results: \nMean Pixel Error [scaled] : %f [%f] '
+                 '\nDetection Rate [scaled]: %f [%f]',
+                 mpe, mpe_scaled, det_rate, det_rate_scaled)
+        data = {
+            'stats': [mpe, mpe_scaled, det_rate, det_rate_scaled],
+            'text_labels': ['Mean Pixel Error',
+                            'Mean Pixel Error Scaled',
+                            'Detection Rate [%]',
+                            'Detection Rate Scaled[%]'],
+        }
+        return data
+
+
+def hungarian_matching(gts, predictions, thresh=0.5):
+    cost = np.zeros((len(gts), len(predictions)))
+
+    for i, (dg, vg) in enumerate(gts):
+        for j, pred in enumerate(predictions):
+            p = np.array(pred.data)
+            dp = p[:, 0:2][vg > 1.0]
+            vp = p[:, 2][vg > 1.0]
+
+            dp[vp < thresh] = -100
+            dp[vp < thresh] = -100
+
+            # measure the per-keypoint distance
+            distances = np.clip(np.linalg.norm(dp - dg, axis=1), 0, 10)
+            cost[i, j] = float(np.mean(distances))
+
+    assert np.max(cost) < 11
+    row, cols = scipy.optimize.linear_sum_assignment(cost)
+    return row, cols, cost
+
+
+def average(my_list, *, empty_value=0.0):
+    """calculate mean of a list"""
+    if not my_list:
+        return empty_value
+
+    return sum(my_list) / float(len(my_list))