Convert DICOM

Working with DICOM converted from JPEG/PNG

When jpeg or png images are uploaded to an MD.ai DICOM dataset, it gets wrapped as a DICOM file with the appropriate DICOM tags. Optionally, these images can be specified using a filename schema that defines hierarchical relationships (exam/series/image). When this dataset is exported, the files are DICOM, not the original jpeg/png images. The original jpeg/png image filenames can be found in the ImageComments tag.

If you would like to use the converted DICOM directly, care must be taken to use the correct color space, since JPEG uses YCbCr, which is specified by DICOM in the PhotometricInterpretation tag with a value of YBR_FULL_422 (or variants). Usually, we want to normalize to the RGB color space when working with machine learning code. The following example code ensures that the resulting pixel array from color DICOM (such as those converted from JPEG/PNG) is converted to RGB, by reading the SamplesPerPixel and PhotometricInterpretation tags and calling the pydicom utility function convert_color_space if necessary:

import pydicom
from pydicom.pixel_data_handlers.util import convert_color_space

ds = pydicom.dcmread('example.dcm')

try:
    is_color = ds.SamplesPerPixel == 3
except Exception:
    is_color = False

try:
    color_space = ds.PhotometricInterpretation
except Exception:
    color_space = ""

arr = ds.pixel_array
if is_color and color_space != "RGB":
    arr = convert_color_space(arr, color_space, "RGB")

Convert DICOM to JPEG/PNG

Converts a single dicom file to an 8 bit format

# Code modified from Ian Pan https://storage.googleapis.com/kaggle-forum-message-attachments/1010629/17014/convert_to_jpeg_for_kaggle.py

def convert_ct_dicom_to_8bit(dicom_file, windows = [[350,40],[1500,-500],[120,70]], imsize=(256.,256.), should_remove_padding = True):
    '''
    Given a DICOM file, window specifications, and image size, return the
    image as a Numpy array scaled to [0,255] of the specified size.

    Parameters
    ----------
    dicom_file: str
        filename that ends in .dcm
    windows: list of lists of ints
        list of window width and window level values
    imsize: tuplet of float
        desired output image size
    should_remove_padding: bool
        if True will remove extra rows/columns of zeroes around the image
    '''
    array = apply_slope_intercept(dicom_file)

    if should_remove_padding:
        array = remove_padding(array)

    # different width, level for each RGB channel
    image = apply_windows(array, windows)
    # resize
    image = resize(image, imsize)

    return image

Save as jpg or png

    image = convert_ct_dicom_to_8bit(dicom_file, windows = [[350,40],[1500,-500],[120,70]], imsize=(256.,256.), should_remove_padding = True)
    im = Image.fromarray(image)
    im.save(dicom_file[-4:] + '.jpg')
    # or
    im.save(dicom_file[-4:] + '.png')

Supporting functions

import pandas as pd
import numpy as np
import pydicom
from scipy.ndimage.interpolation import zoom

# Applies slope and intercept from DICOM tags
def apply_slope_intercept(dicom_file):
    array = dicom_file.pixel_array.copy()
    try:
        slope = float(dicom_file.RescaleSlope)
        intercept = float(dicom_file.RescaleIntercept)
    except Exception:
        slope = 1
        intercept = 0
    if slope != 1 or intercept != 0:
        array = array * slope
        array = array + intercept
    return array

# Removes zeroes around image
def remove_padding(array):
    array = array.copy()
    nonzeros = np.nonzero(array)
    x1 = np.min(nonzeros[0]) ; x2 = np.max(nonzeros[0])
    y1 = np.min(nonzeros[1]) ; y2 = np.max(nonzeros[1])
    return array[x1:x2,y1:y2]

# Apply different W/L settings to different channels to take advantage of RGB structure
def apply_windows(array, windows):
    layers = []
    for values in windows:
        if len(value) >= 2:
            ww = values[0]
            wl = values[1]
            layers.append(np.expand_dims(window(array, WL=wl, WW=ww), axis=3))
    if len(layers) == 0:
        return np.expand_dims(window(array, WL=350, WW=40), axis=3)
    else:
        return np.concatenate(layers, axis=3)

# Resize
def resize(image, imsize):
    rat = max(imsize) / np.max(image.shape[1:])
    return zoom(image, [1.,rat,rat,1.], prefilter=False, order=1)

Common window width and window level for CT exams

Target	WW	WL
brain	80	40
subdural	215	75
stroke_1	8	32
stroke_2	40	40
temporal_bone	2800	600
neck_soft_tissue	375	40
lung	1500	-500
emphysema	800	-800
mediastinum	400	40
pulmonary_embolism	700	100
abdomen	350	40
liver	120	70
kidney	700	50
bone	2500	480

Convert JPG, PNG to DICOM

Converts a single jpg or png file to dicom format

import os
import subprocess
import tempfile
import uuid
from PIL import Image
from pydicom.uid import generate_uid


def from_jpeg(jpeg_fp, dicom_tags={}):
    """Converts JPEG to DICOM as secondary capture, with minimal DICOM tags.
    If JPEG mode is RGBA or CMYK, we must first convert to RGB since these photometric
    interpretations have been retired in the DICOM standard:
    http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.7.6.3.html.
    Returns: path to DICOM tempfile
    """
    # If JPEG is RGBA/CMYK mode, convert to RGB mode first.
    im = Image.open(jpeg_fp)
    if im.mode in ("RGBA", "CMYK"):
        im2 = im.convert("RGB")
        im2.save(jpeg_fp)
        im2.close()
    im.close()

    dicom_fp = os.path.join(tempfile.gettempdir(), f"{str(uuid.uuid4())}.dcm")
    try:
        cmd = ["img2dcm", jpeg_fp, dicom_fp]
        for key, value in dicom_tags.items():
            cmd.extend(["-k", f"{key}={value}"])
        subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
    except Exception:
        return None
    return dicom_fp


def from_png(png_fp, dicom_tags={}):
    """Converts PNG to DICOM as secondary capture, with minimal DICOM tags.
    Returns: path to DICOM tempfile
    """
    jpeg_fp = os.path.join(tempfile.gettempdir(), f"{str(uuid.uuid4())}.jpg")
    try:
        cmd = ["convert", png_fp, jpeg_fp]
        subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
    except Exception:
        return None
    return from_jpeg(jpeg_fp, dicom_tags=dicom_tags)

Convert CT nifti to DICOM

Use the convert_ct function in common_utils to convert nifti files to DICOM for uploading to MD.ai. You'll need to choose your input and output directories. Optionally, you can change the plane or default window/level settings. You'll also need a sample dicom which you can download from here

results = mdai.common_utils.convert_ct(
    input_dir=None,
    output_dir=None,
    input_ext=".nii.gz",
    plane="axial",
    sample_dicom_fp=os.path.join(os.path.dirname(""), "./sample_dicom.dcm"),
    window_center=40,
    window_width=350,
)

This function will write the converted DICOM files to the output directory and will give each image from a nifti file the same Study and Series UIDs. Use the CLI tool to upload the newly created DICOM images into your project

Convert DICOM to nifti

Use the dicom2nifiti library from icometrix

!pip install dicom2nifti

import dicom2nifti
dicom2nifti.convert_directory(dicom_directory, output_folder, compression=True, reorient=True)

This will convert the dicom files from the dicom_directory to the output_folder.

Convert MR .mha files to DICOM

Use the function provided below to convert MR modality .mha files to DICOM for uploading to MD.ai. You'll need to provide the filepath to the MHA file. Optionally, you can change the plane or default window/level settings. You'll also need a sample dicom file which you can download from here. This method was provided by Ramon Correa, one of our summer 2022 interns.

import os
import pydicom as pyd
from pydicom import uid
from medpy.io import load
import numpy as np

def convert_mha_file(
    filepath,
    output_dir='./processed_images',
    plane="axial",
    sample_dicom_fp='./sample_dicom.dcm',
    window_center=400,
    window_width=1000
):
    """Takes path to an mha file converts and saves as series of dicom files.
    filepath: str path to an mha file.
    output_dir:str root directory where images should be stored.
    plane: Acquisition plane  of the original image.
    sample_dicom_fp: Path to dicom image to use as reference
    window_center: Dicom Parameter  used for windowing
    window_width: Dicom Parameter used for windowing
    """
    input_ext = '.mha'
    voxel_arr,header = load(filepath)

    pixdim = header.get_voxel_spacing()

    # Image coordinates -> World coordinates
    if plane == "axial":
        slice_axis = 2
        plane_axes = [0, 1]
    elif plane == "coronal":
        slice_axis = 1
        plane_axes = [0, 2]
    elif plane == "sagittal":
        slice_axis = 0
        plane_axes = [1, 2]
    thickness = pixdim[slice_axis]
    spacing = [pixdim[plane_axes[1]], pixdim[plane_axes[0]]]

    # generate DICOM UIDs (StudyInstanceUID and SeriesInstanceUID)
    study_uid = pyd.uid.generate_uid(prefix=None)
    series_uid = pyd.uid.generate_uid(prefix=None)
    # randomized patient ID

    patient_id = str(uid.uuid.uuid4())
    patient_name = patient_id

    scale_slope = "1"
    scale_intercept = "0"
    # create base directory
    base_dir =  os.path.join( output_dir,
    os.path.basename(filepath).replace(input_ext,"")
    )
    os.makedirs(base_dir,exist_ok=True)
    for slice_index in range(voxel_arr.shape[-1]):
        # generate SOPInstanceUID
        instance_uid = pyd.uid.generate_uid(prefix=None)

        loc = slice_index * thickness

        ds = pyd.dcmread(sample_dicom_fp)

        # slice and set PixelData tag
        axes = [slice(None)] * 3
        axes[slice_axis] = slice_index
        arr = voxel_arr[:,:,slice_index].T.astype(np.int16)
        ds[0x7fe00010].value = arr.tobytes()

        # modify tags
        # using code from original nifti2dcm
        # - UIDs are created by pydicom.uid.generate_uid at each level above
        # - image position is calculated by combination of slice index and slice thickness
        # - slice location is set to the value of image position along z-axis
        # - Rows/Columns determined by array shape
        # - we set slope/intercept to 1/0 since we're directly converting from PNG pixel values
        ds[0x00080018].value = instance_uid  # SOPInstanceUID
        ds[0x00100010].value = patient_name
        ds[0x00100020].value = patient_id
        ds[0x0020000d].value = study_uid  # StudyInstanceUID
        ds[0x0020000e].value = series_uid  # SeriesInstanceUID
        ds[0x0008103e].value = ""  # Series Description
        ds[0x00200011].value = "1"  # Series Number
        ds[0x00200012].value = str(slice_index + 1)  # Acquisition Number
        ds[0x00200013].value = str(slice_index + 1)  # Instance Number
        ds[0x00201041].value = str(loc)  # Slice Location
        ds[0x00280010].value = arr.shape[0]  # Rows
        ds[0x00280011].value = arr.shape[1]  # Columns
        ds[0x00280030].value = spacing  # Pixel Spacing
        ds[0x00281050].value = str(window_center)  # Window Center
        ds[0x00281051].value = str(window_width)  # Window Width
        ds[0x00281052].value = str(scale_intercept)  # Rescale Intercept
        ds[0x00281053].value = str(scale_slope)  # Rescale Slope
        ds.Modality = "MR"

        # Image Position (Patient)
        # Image Orientation (Patient)
        if plane == "axial":
            ds[0x00200032].value = ["0", "0", str(loc)]
            ds[0x00200037].value = ["1", "0", "0", "0", "1", "0"]
        elif plane == "coronal":
            ds[0x00200032].value = ["0", str(loc), "0"]
            ds[0x00200037].value = ["1", "0", "0", "0", "0", "1"]
        elif plane == "sagittal":
            ds[0x00200032].value = [str(loc), "0", "0"]
            ds[0x00200037].value = ["0", "1", "0", "0", "0", "1"]

        # add new tags
        # see tag info e.g., from https://dicom.innolitics.com/ciods/nm-image/nm-reconstruction/00180050
        # Slice Thickness
        ds[0x00180050] = pyd.dataelem.DataElement(0x00180050, "DS", str(thickness))
        ds.SeriesDescription = f"MR {plane}"

        dicom_fp =  os.path.join( output_dir,
        os.path.basename(filepath).replace(input_ext,""),
        "{:03}.dcm".format(slice_index + 1),
        )
        dcm_base,_ = os.path.split(dicom_fp)
        os.makedirs(dcm_base,exist_ok=True)
        pyd.dcmwrite(dicom_fp,ds)

Convert Annotations to DICOM SR

With the util class dicom_utils.SrExport(), all annotations will be converted, capturing the label's and their parent's name in SR format (The specific Code Value of each annotation will be arbitrary). An SR file will be created for each annotator in each annotated study. The SR's DICOM data will be consistent with the study it references. The study and annotation information comes from the inputted "Annotation Json" and "Metadata Json" (These jsons have to be referencing the same dataset(s) for it to work).

The labels will be ordered from exam level to series to image. Each image level label will have a "Referenced Image" section preceding the label to indicate it's source. Series labels will have "Series UID: xxx" under them for better referencing as well.

import mdai
from glob import glob

# Define your personal token and project id
personal_token = 'a1s2d3f4g4h5h59797kllh8vk'     # put your token here
project_id = 'LFdpnJGv'

# Create an mdai client
mdai_client = mdai.Client(domain='public.md.ai', access_token=personal_token)

# Download the annotation data only (all label groups)
p = mdai_client.project(project_id, path='.',  annotations_only=True)

# Download only the DICOM metadata
p = mdai_client.download_dicom_metadata(project_id, format ='json', path='.')

# Use glob to find the downloaded json files (or get them manually)
annotation_file = glob('*annotations*.json')[0]
metadata_file = glob('*dicom_metadata*.json')[0]

# Use the SrExport class to export the annotations to DICOM SR format

exporter = mdai.dicom_utils.SrExport(annotation_json=annotation_file, metadata_json=metadata_file, output_dir='out_folder')

Convert Annotations to DICOM SEG (Recommended)

With the util class dicom_utils.SegExport(), only local annotations will be exported. This export process converts the annotation data into a binary mask, and creates the relevant segmentation DICOM data to export a DICOM Segmentation file. A Segmentation file will be created for each annotator in each annotated series. Additionally, if combine_label_groups is False, a different file will be created for each label group. The file's DICOM headers will be consistent with the original DICOM's. The study and annotation information comes from the inputted "Annotation Json" and "Metadata Json" (These jsons have to be referencing the same dataset(s) for it to work).

The segmentation frames are grouped together by their labels and within those groups, they are ordered by their source's frame number.

import mdai
from glob import glob

# Define your personal token and project id
personal_token = 'a1s2d3f4g4h5h59797kllh8vk'     # put your token here
project_id = 'LFdpnJGv'

# Create an mdai client
mdai_client = mdai.Client(domain='public.md.ai', access_token=personal_token)

# Download the annotation data only (all label groups)
p = mdai_client.project(project_id, path='.',  annotations_only=True)

# Download only the DICOM metadata
p = mdai_client.download_dicom_metadata(project_id, format ='json', path='.')

# Use glob to find the downloaded json files (or get them manually)
annotation_file = glob('*annotations*.json')[0]
metadata_file = glob('*dicom_metadata*.json')[0]

# Use the SegExport class to export the annotations to DICOM SEG format

exporter = mdai.dicom_utils.SegExport(annotation_json=annotation_file, metadata_json=metadata_file, combine_label_groups=True, output_dir='out_folder')

Convert Freeform Annotations to DICOM SEG (Legacy)

Instructions can be found here.

Support functions

Read DICOM UIDs and tags from your original files

Use this code on your original data to create a dataframe of DICOM tags. These are sample tags, you can add or remove values, as needed.

from pathlib import Path
import pydicom

images_path = Path('MY_PATH')
filenames = list(images_path.glob('**/*.dcm'))
info = []

for f in filenames:
    d = pydicom.dcmread(str(f),stop_before_pixels=True)
    info.append({'fn':str(f),
        'StudyInstanceUID':d.StudyInstanceUID,
        'SeriesInstanceUID':d.SeriesInstanceUID,
        'SOPInstanceUID':d.SOPInstanceUID,
        'description':d.SeriesDescription if 'SeriesDescription' in d else "",
        'name':d.SequenceName if 'SequenceName' in d else "",
        'Modality':d.Modality if 'Modality' in d else "",
        'ContrastAgent':d.ContrastBolusAgent if 'ContrastBolusAgent' in d else "",
        'ScanOptions':d.ScanOptions if 'ScanOptions' in d else "",
        'WW':d.WindowWidth if 'WindowWidth' in d else "",
        'WC':d.WindowCenter if 'WindowCenter' in d else "",
        'ImageType' :d.ImageType if 'ImageType' in d else "",
        'PixelSpacing' :d.PixelSpacing if 'PixelSpacing' in d else "",
        'SliceThickness':d.SliceThickness if 'SliceThickness' in d else "",
        'PhotometricInterpretation':d.PhotometricInterpretation if 'PhotometricInterpretation' in d else ""
                  })
df = pd.DataFrame(info)