Skip to content

Convert JSON

1. Download JSON

  • Option 1: Download JSON file directly from the UI using the Export tab and drag JSON file into working director
  • Option 2: Use mdai library to download JSON

Create Client

import mdai
# Get variables from project info tab and user settings
DOMAIN = 'public.md.ai'
YOUR_PERSONAL_TOKEN = 'a1s2d3f4g4h5h59797kllh8vk'
PROJECT_ID = 'MwBe19Br' #project info
mdai_client = mdai.Client(domain=DOMAIN, access_token=YOUR_PERSONAL_TOKEN)

Example output

Successfully authenticated to public.md.ai.

Download annotations

# Download only the annotation data
p = mdai_client.project(PROJECT_ID, path='.',  annotations_only=True)

Example output

Using working directory for data.
Preparing annotations export for project MwBe19Br...
Success: annotations data for project MwBe19Br ready.
Downloading file: mdai_public_project_MwBe19Br_annotations_labelgroup_all_2020-09-23-214038.json
No project created. Downloaded annotations only.

2. JSON to Dataframe

Copy the downloaded file name from the output above or from your downloaded JSON file

#Replace with your filename
JSON = 'mdai_public_project_MwBe19Br_annotations_labelgroup_all_2020-09-23-214038.json'
results = mdai.common_utils.json_to_dataframe(JSON)
#Annotations dataframe
annots_df = results['annotations']

3. Conversions

Bounding box

Extract box data dictionary items

#Simplify table
columns_brief = ['id', 'StudyInstanceUID', 'SeriesInstanceUID', 'SOPInstanceUID', 'labelName', 'data', 'annotationMode']
annots_df = annots_df[columns_brief]

#Box annotations
boxes = annots_df[annots_df.annotationMode == 'bbox']

#Extract box data
def extract_box_data(df):
    j = df.copy()
    j = j[(j.annotationMode == 'bbox') & (~j.data.isnull())]
    try:
        j['data'] = j['data'].apply(lambda x:json.loads(x.replace("'", "\"")))
    except:
        j['data']

    j['x'] = [d['x'] for _,d in j.data.iteritems()]
    j['y'] = [d['y'] for _,d in j.data.iteritems()]
    j['w'] = [d['width'] for _,d in j.data.iteritems()]
    j['h'] = [d['height'] for _,d in j.data.iteritems()]
    j = j.drop('data', axis=1)
    return j

boxes = extract_box_data(boxes)
boxes.head()

Example output

    id  StudyInstanceUID    SeriesInstanceUID   SOPInstanceUID  labelName   annotationMode  x   y   w   h
0   A_J07mWn    1.3.6.1.4.1.14519.5.2.1.1079.4008.312038908377...   1.3.6.1.4.1.14519.5.2.1.1079.4008.228089024512...   1.3.6.1.4.1.14519.5.2.1.1079.4008.515564548072...   Stomach bbox    220.822845  124.504723  121.008514  82.380470
1   A_qxVdNk    1.3.6.1.4.1.14519.5.2.1.1079.4008.312038908377...   1.3.6.1.4.1.14519.5.2.1.1079.4008.228089024512...   1.3.6.1.4.1.14519.5.2.1.1079.4008.306261432947...   Stomach bbox    183.308075  148.138794  99.586609   68.920502
2   A_49KLWk    1.3.6.1.4.1.14519.5.2.1.1079.4008.312038908377...   1.3.6.1.4.1.14519.5.2.1.1079.4008.228089024512...   1.3.6.1.4.1.14519.5.2.1.1079.4008.132057312174...   Stomach bbox    251.314133  125.590210  84.764328   103.895081

Derive additional box data

Now lets get the box center, area, and bottom left corner

boxes['area'] = boxes.x * boxes.y
boxes['center_x'] = boxes.x + boxes.w/2
boxes['center_y'] = boxes.y + boxes.h/2
boxes['bottom_x'] = boxes.x + boxes.w
boxes['bottom_y'] = boxes.y + boxes.h
#Convert values to integers for simplicity
boxes[boxes.columns[6:]] = boxes[boxes.columns[6:]].astype('int')
boxes.head()

Example output

    id  StudyInstanceUID    SeriesInstanceUID   SOPInstanceUID  labelName   annotationMode  x   y   w   h   area    center_x    center_y    bottom_x    bottom_y
0   A_J07mWn    1.3.6.1.4.1.14519.5.2.1.1079.4008.312038908377...   1.3.6.1.4.1.14519.5.2.1.1079.4008.228089024512...   1.3.6.1.4.1.14519.5.2.1.1079.4008.515564548072...   Stomach bbox    220 124 121 82  27391   280 165 341 206
1   A_qxVdNk    1.3.6.1.4.1.14519.5.2.1.1079.4008.312038908377...   1.3.6.1.4.1.14519.5.2.1.1079.4008.228089024512...   1.3.6.1.4.1.14519.5.2.1.1079.4008.306261432947...   Stomach bbox    183 148 99  68  27109   232 182 282 217
2   A_49KLWk    1.3.6.1.4.1.14519.5.2.1.1079.4008.312038908377...   1.3.6.1.4.1.14519.5.2.1.1079.4008.228089024512...   1.3.6.1.4.1.14519.5.2.1.1079.4008.132057312174...   Stomach bbox    251 125 84  103 31523   293 177 335 229

Formatting examples

Detectron 2

Detectron 2 expects bbox coordinates to be in the format of [x_upper_left, y_upper_left, x_lower_right, y_lower_right]. Be mindful of the need to scale the annotation data if image is scaled

scale = 1.
boxes['detectron_bbox'] =  [[row.x * scale, row.y * scale, row.bottom_x * scale, row.bottom_y * scale] for _,row in boxes.iterrows()]
Fastai

Fastai expects bbox coordinates to be in the format of (y_upper_left, x_upper_left, y_lower_right, x_lower_right) with the origin being in the upper left hand corner of the image. Remember to scale annotations if images are scaled

scale = 1.
boxes['fastai_bbox'] =  [[row.y * scale, row.x * scale, row.bottom_y * scale, row.bottom_x * scale] for _,row in boxes.iterrows()]

Freeform and polygon

Filter for freeform and polygon

#Simplify table
columns_brief = ['id', 'StudyInstanceUID', 'SeriesInstanceUID', 'SOPInstanceUID', 'labelName', 'data', 'annotationMode']
annots_df = annots_df[columns_brief]

#Shape annotations
shapes = annots_df[(annots_df.annotationMode == 'freeform') | (annots_df.annotationMode == 'polygon')]

Get box bounding the vertices

#Extract box data from vertices
import numpy as np

def vertices_to_boxes(data):
    vertices = data['vertices']
    px=[v[0] for v in vertices]
    py=[v[1] for v in vertices]
    x = int(np.min(px))
    y = int(np.min(py))
    x2 = int(np.max(px))
    y2 = int(np.max(py))
    w = x2 - x
    h = y2 - y
    return (x, y, w, h, x2, y2)

shapes['x'],shapes['y'],shapes['w'],shapes['h'],shapes['bottom_x'],shapes['bottom_y'] = zip(*shapes['data'].map(vertices_to_boxes))
shapes.head()

Example output

id  StudyInstanceUID    SeriesInstanceUID   SOPInstanceUID  labelName   data    annotationMode  x   y   w   h   bottom_x    bottom_y
0   A_JddPQE    1.3.46.670589.11.20182.5.0.8336.20130724083604...   1.3.46.670589.11.20182.5.0.7188.20130724085044...   1.3.6.1.4.1.9590.100.1.2.260616927513309992437...   Liver   {'vertices': [[64, 130], [64, 129], [64, 128],...   freeform    54  115 10  20  64  135
1   A_qG28ZY    1.3.46.670589.11.20182.5.0.8336.20130724083604...   1.3.46.670589.11.20182.5.0.7188.20130724085044...   1.3.6.1.4.1.9590.100.1.2.388757691711551811514...   Liver   {'vertices': [[66, 129], [66, 128], [66, 127],...   freeform    53  112 13  27  66  139

Longest diameter

Get longest diameter of a shape in mm. Pixel spacing is obtained using the pydicom library and the PixelSpacing tag

import cv2

def longest_diameter(data, pixel_spacing):
    try:
        row_spacing, col_spacing = pixel_spacing
    except:
        return -1

    max_points = most_distant_points(np.array(data['vertices']))
    x1 = max_points[0][0]
    y1 = max_points[0][1]
    x2 = max_points[1][0]
    y2 = max_points[1][1]

    dx = col_spacing * (x2 - x1)
    dy = row_spacing * (y2 - y1)
    distance = np.sqrt(dx ** 2 + dy ** 2)
    #returns longest diameter in mm
    return round(distance, 2)

def most_distant_points(points):
    hull = cv2.convexHull(points, returnPoints=True)

    max_points = [points[0], points[1]]
    max_distance = 0
    for i in range(0,len(hull)):
        for j in range(i+1,len(hull)):
            p1 = hull[i][0]
            p2 = hull[j][0]
            distance = calc_distance(p1, p2)
        if (distance > max_distance):
            max_points = [p1, p2]
            max_distance = distance
    return max_points;

def calc_distance(p1, p2):
    return np.sqrt((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2)

Example

import pydicom as py

ds = py.dcmread('000.dcm')
ld = longest_diameter(data, ds.PixelSpacing)

Mask

Convert MD.ai annotation to mask

Function to load a single mask instance from one row of annotation data. This will turn one box, free form, polygon, etc into a binary mask sized to the corresponding image.

def load_mask_instance(row):
    """Load instance masks for the given annotation row. Masks can be different types,
    mask is a binary true/false map of the same size as the image.
    """

    mask = np.zeros((row.height, row.width), dtype=np.uint8)

    annotation_mode = row.annotationMode
    # print(annotation_mode)

    if annotation_mode == "bbox":
        # Bounding Box
        x = int(row["data"]["x"])
        y = int(row["data"]["y"])
        w = int(row["data"]["width"])
        h = int(row["data"]["height"])
        mask_instance = mask[:,:].copy()
        cv2.rectangle(mask_instance, (x, y), (x + w, y + h), 255, -1)
        mask[:,:] = mask_instance

    # FreeForm or Polygon
    elif annotation_mode == "freeform" or annotation_mode == "polygon":
        vertices = np.array(row["data"]["vertices"])
        vertices = vertices.reshape((-1, 2))
        mask_instance = mask[:,:].copy()
        cv2.fillPoly(mask_instance, np.int32([vertices]), (255, 255, 255))
        mask[:,:] = mask_instance

    # Line
    elif annotation_mode == "line":
        vertices = np.array(row["data"]["vertices"])
        vertices = vertices.reshape((-1, 2))
        mask_instance = mask[:,:].copy()
        cv2.polylines(mask_instance, np.int32([vertices]), False, (255, 255, 255), 12)
        mask[:,:] = mask_instance

    elif annotation_mode == "location":
        # Bounding Box
        x = int(row["data"]["x"])
        y = int(row["data"]["y"])
        mask_instance = mask[:,:].copy()
        cv2.circle(mask_instance, (x, y), 7, (255, 255, 255), -1)
        mask[:,:] = mask_instance

    elif annotation_mode is None:
        print("Not a local instance")


    return mask.astype(np.bool)