Convert JSON

1. Download JSON

Option 1: Download JSON file directly from the UI using the Export tab and drag JSON file into working director
Option 2: Use mdai library to download JSON

Create Client

import mdai
# Get variables from project info tab and user settings
DOMAIN = 'public.md.ai'
YOUR_PERSONAL_TOKEN = 'a1s2d3f4g4h5h59797kllh8vk'
PROJECT_ID = 'MwBe19Br' # project info
mdai_client = mdai.Client(domain=DOMAIN, access_token=YOUR_PERSONAL_TOKEN)

Example output

Successfully authenticated to public.md.ai.

Download annotations

# Download only the annotation data
p = mdai_client.project(PROJECT_ID, path='.',  annotations_only=True)

Example output

Using working directory for data.
Preparing annotations export for project MwBe19Br...
Success: annotations data for project MwBe19Br ready.
Downloading file: mdai_public_project_MwBe19Br_annotations_labelgroup_all_2020-09-23-214038.json
No project created. Downloaded annotations only.

2. JSON to Dataframe

Copy the downloaded file name from the output above or from your downloaded JSON file. (if errors occur, try downloading annotations outside of your firewall)

# Replace with your filename
JSON = 'mdai_public_project_MwBe19Br_annotations_labelgroup_all_2020-09-23-214038.json'
results = mdai.common_utils.json_to_dataframe(JSON)
# Annotations dataframe
annots_df = results['annotations']

3. Conversions

Bounding box

Extract box data dictionary items

# Simplify table
columns_brief = ['id', 'StudyInstanceUID', 'SeriesInstanceUID', 'SOPInstanceUID', 'labelName', 'data', 'annotationMode']
annots_df = annots_df[columns_brief]

# Box annotations
boxes = annots_df[annots_df.annotationMode == 'bbox']

# Extract box data
def extract_box_data(df):
    j = df.copy()
    j = j[(j.annotationMode == 'bbox') & (~j.data.isnull())]
    try:
        j['data'] = j['data'].apply(lambda x:json.loads(x.replace("'", "\"")))
    except:
        j['data']

    j['x'] = [d['x'] for _,d in j.data.iteritems()]
    j['y'] = [d['y'] for _,d in j.data.iteritems()]
    j['w'] = [d['width'] for _,d in j.data.iteritems()]
    j['h'] = [d['height'] for _,d in j.data.iteritems()]
    j = j.drop('data', axis=1)
    return j

boxes = extract_box_data(boxes)
boxes.head()

Example output

    id  StudyInstanceUID    SeriesInstanceUID   SOPInstanceUID  labelName   annotationMode  x   y   w   h
0   A_J07mWn    1.3.6.1.4.1.14519.5.2.1.1079.4008.312038908377...   1.3.6.1.4.1.14519.5.2.1.1079.4008.228089024512...   1.3.6.1.4.1.14519.5.2.1.1079.4008.515564548072...   Stomach bbox    220.822845  124.504723  121.008514  82.380470
1   A_qxVdNk    1.3.6.1.4.1.14519.5.2.1.1079.4008.312038908377...   1.3.6.1.4.1.14519.5.2.1.1079.4008.228089024512...   1.3.6.1.4.1.14519.5.2.1.1079.4008.306261432947...   Stomach bbox    183.308075  148.138794  99.586609   68.920502
2   A_49KLWk    1.3.6.1.4.1.14519.5.2.1.1079.4008.312038908377...   1.3.6.1.4.1.14519.5.2.1.1079.4008.228089024512...   1.3.6.1.4.1.14519.5.2.1.1079.4008.132057312174...   Stomach bbox    251.314133  125.590210  84.764328   103.895081

Derive additional box data

Now lets get the box center, area, and bottom left corner

boxes['area'] = boxes.x * boxes.y
boxes['center_x'] = boxes.x + boxes.w/2
boxes['center_y'] = boxes.y + boxes.h/2
boxes['bottom_x'] = boxes.x + boxes.w
boxes['bottom_y'] = boxes.y + boxes.h
# Convert values to integers for simplicity
boxes[boxes.columns[6:]] = boxes[boxes.columns[6:]].astype('int')
boxes.head()

Example output

    id  StudyInstanceUID    SeriesInstanceUID   SOPInstanceUID  labelName   annotationMode  x   y   w   h   area    center_x    center_y    bottom_x    bottom_y
0   A_J07mWn    1.3.6.1.4.1.14519.5.2.1.1079.4008.312038908377...   1.3.6.1.4.1.14519.5.2.1.1079.4008.228089024512...   1.3.6.1.4.1.14519.5.2.1.1079.4008.515564548072...   Stomach bbox    220 124 121 82  27391   280 165 341 206
1   A_qxVdNk    1.3.6.1.4.1.14519.5.2.1.1079.4008.312038908377...   1.3.6.1.4.1.14519.5.2.1.1079.4008.228089024512...   1.3.6.1.4.1.14519.5.2.1.1079.4008.306261432947...   Stomach bbox    183 148 99  68  27109   232 182 282 217
2   A_49KLWk    1.3.6.1.4.1.14519.5.2.1.1079.4008.312038908377...   1.3.6.1.4.1.14519.5.2.1.1079.4008.228089024512...   1.3.6.1.4.1.14519.5.2.1.1079.4008.132057312174...   Stomach bbox    251 125 84  103 31523   293 177 335 229

Formatting examples

Detectron 2

Detectron 2 expects bbox coordinates to be in the format of [x_upper_left, y_upper_left, x_lower_right, y_lower_right]. Be mindful of the need to scale the annotation data if image is scaled

scale = 1.
boxes['detectron_bbox'] =  [[row.x * scale, row.y * scale, row.bottom_x * scale, row.bottom_y * scale] for _,row in boxes.iterrows()]

Fastai

Fastai expects bbox coordinates to be in the format of (y_upper_left, x_upper_left, y_lower_right, x_lower_right) with the origin being in the upper left hand corner of the image. Remember to scale annotations if images are scaled

scale = 1.
boxes['fastai_bbox'] =  [[row.y * scale, row.x * scale, row.bottom_y * scale, row.bottom_x * scale] for _,row in boxes.iterrows()]

Freeform and polygon

Filter for freeform and polygon

# Simplify table
columns_brief = ['id', 'StudyInstanceUID', 'SeriesInstanceUID', 'SOPInstanceUID', 'labelName', 'data', 'annotationMode']
annots_df = annots_df[columns_brief]

# Shape annotations
shapes = annots_df[(annots_df.annotationMode == 'freeform') | (annots_df.annotationMode == 'polygon')]

Get box bounding the vertices

# Extract box data from vertices
import numpy as np

def vertices_to_boxes(data):
    vertices = data['vertices']
    px=[v[0] for v in vertices]
    py=[v[1] for v in vertices]
    x = int(np.min(px))
    y = int(np.min(py))
    x2 = int(np.max(px))
    y2 = int(np.max(py))
    w = x2 - x
    h = y2 - y
    return (x, y, w, h, x2, y2)

shapes['x'],shapes['y'],shapes['w'],shapes['h'],shapes['bottom_x'],shapes['bottom_y'] = zip(*shapes['data'].map(vertices_to_boxes))
shapes.head()

Example output

id  StudyInstanceUID    SeriesInstanceUID   SOPInstanceUID  labelName   data    annotationMode  x   y   w   h   bottom_x    bottom_y
0   A_JddPQE    1.3.46.670589.11.20182.5.0.8336.20130724083604...   1.3.46.670589.11.20182.5.0.7188.20130724085044...   1.3.6.1.4.1.9590.100.1.2.260616927513309992437...   Liver   {'vertices': [[64, 130], [64, 129], [64, 128],...   freeform    54  115 10  20  64  135
1   A_qG28ZY    1.3.46.670589.11.20182.5.0.8336.20130724083604...   1.3.46.670589.11.20182.5.0.7188.20130724085044...   1.3.6.1.4.1.9590.100.1.2.388757691711551811514...   Liver   {'vertices': [[66, 129], [66, 128], [66, 127],...   freeform    53  112 13  27  66  139

Longest diameter

Get longest diameter of a shape in mm. Pixel spacing is obtained using the pydicom library and the PixelSpacing tag

import cv2

def longest_diameter(data, pixel_spacing):
    try:
        row_spacing, col_spacing = pixel_spacing
    except:
        return -1

    max_points = most_distant_points(np.array(data['vertices']))
    x1 = max_points[0][0]
    y1 = max_points[0][1]
    x2 = max_points[1][0]
    y2 = max_points[1][1]

    dx = col_spacing * (x2 - x1)
    dy = row_spacing * (y2 - y1)
    distance = np.sqrt(dx ** 2 + dy ** 2)
    # returns longest diameter in mm
    return round(distance, 2)

def most_distant_points(points):
    hull = cv2.convexHull(points, returnPoints=True)

    max_points = [points[0], points[1]]
    max_distance = 0
    for i in range(0,len(hull)):
        for j in range(i+1,len(hull)):
            p1 = hull[i][0]
            p2 = hull[j][0]
            distance = calc_distance(p1, p2)
        if (distance > max_distance):
            max_points = [p1, p2]
            max_distance = distance
    return max_points;

def calc_distance(p1, p2):
    return np.sqrt((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2)

Example

import pydicom as py

ds = py.dcmread('000.dcm')
ld = longest_diameter(data, ds.PixelSpacing)

Mask

Convert MD.ai annotation to mask

Function to load a single mask instance from one row of annotation data. This will turn one box, free form, polygon, etc into a binary mask sized to the corresponding image.

def load_mask_instance(row):
    """Load instance masks for the given annotation row. Masks can be different types,
    mask is a binary true/false map of the same size as the image.
    """

    mask = np.zeros((row.height, row.width), dtype=np.uint8)

    annotation_mode = row.annotationMode
    # print(annotation_mode)

    if annotation_mode == "bbox":
        # Bounding Box
        x = int(row["data"]["x"])
        y = int(row["data"]["y"])
        w = int(row["data"]["width"])
        h = int(row["data"]["height"])
        mask_instance = mask[:,:].copy()
        cv2.rectangle(mask_instance, (x, y), (x + w, y + h), 255, -1)
        mask[:,:] = mask_instance

    # FreeForm or Polygon
    elif annotation_mode == "freeform" or annotation_mode == "polygon":
        vertices = np.array(row["data"]["vertices"])
        vertices = vertices.reshape((-1, 2))
        mask_instance = mask[:,:].copy()
        cv2.fillPoly(mask_instance, np.int32([vertices]), (255, 255, 255))
        mask[:,:] = mask_instance

    # Line
    elif annotation_mode == "line":
        vertices = np.array(row["data"]["vertices"])
        vertices = vertices.reshape((-1, 2))
        mask_instance = mask[:,:].copy()
        cv2.polylines(mask_instance, np.int32([vertices]), False, (255, 255, 255), 12)
        mask[:,:] = mask_instance

    elif annotation_mode == "location":
        # Bounding Box
        x = int(row["data"]["x"])
        y = int(row["data"]["y"])
        mask_instance = mask[:,:].copy()
        cv2.circle(mask_instance, (x, y), 7, (255, 255, 255), -1)
        mask[:,:] = mask_instance

    elif annotation_mode is None:
        print("Not a local instance")


    return mask.astype(np.bool)