Convert DICOM
Working with DICOM converted from JPEG/PNG
When jpeg or png images are uploaded to an MD.ai DICOM dataset, it gets wrapped as a DICOM file with the appropriate DICOM tags. Optionally, these images can be specified using a filename schema that defines hierarchical relationships (exam/series/image). When this dataset is exported, the files are DICOM, not the original jpeg/png images. The original jpeg/png image filenames can be found in the ImageComments
tag.
If you would like to use the converted DICOM directly, care must be taken to use the correct color space, since JPEG uses YCbCr, which is specified by DICOM in the PhotometricInterpretation
tag with a value of YBR_FULL_422
(or variants). Usually, we want to normalize to the RGB color space when working with machine learning code. The following example code ensures that the resulting pixel array from color DICOM (such as those converted from JPEG/PNG) is converted to RGB, by reading the SamplesPerPixel
and PhotometricInterpretation
tags and calling the pydicom
utility function convert_color_space
if necessary:
import pydicom
from pydicom.pixel_data_handlers.util import convert_color_space
ds = pydicom.dcmread('example.dcm')
try:
is_color = ds.SamplesPerPixel == 3
except Exception:
is_color = False
try:
color_space = ds.PhotometricInterpretation
except Exception:
color_space = ""
arr = ds.pixel_array
if is_color and color_space != "RGB":
arr = convert_color_space(arr, color_space, "RGB")
Convert DICOM to JPEG/PNG
Converts a single dicom file to an 8 bit format
# Code modified from Ian Pan https://storage.googleapis.com/kaggle-forum-message-attachments/1010629/17014/convert_to_jpeg_for_kaggle.py
def convert_ct_dicom_to_8bit(dicom_file, windows = [[350,40],[1500,-500],[120,70]], imsize=(256.,256.), should_remove_padding = True):
'''
Given a DICOM file, window specifications, and image size, return the
image as a Numpy array scaled to [0,255] of the specified size.
Parameters
----------
dicom_file: str
filename that ends in .dcm
windows: list of lists of ints
list of window width and window level values
imsize: tuplet of float
desired output image size
should_remove_padding: bool
if True will remove extra rows/columns of zeroes around the image
'''
array = apply_slope_intercept(dicom_file)
if should_remove_padding:
array = remove_padding(array)
# different width, level for each RGB channel
image = apply_windows(array, windows)
# resize
image = resize(image, imsize)
return image
Save as jpg or png
image = convert_ct_dicom_to_8bit(dicom_file, windows = [[350,40],[1500,-500],[120,70]], imsize=(256.,256.), should_remove_padding = True)
im = Image.fromarray(image)
im.save(dicom_file[-4:] + '.jpg')
# or
im.save(dicom_file[-4:] + '.png')
Supporting functions
import pandas as pd
import numpy as np
import pydicom
from scipy.ndimage.interpolation import zoom
# Applies slope and intercept from DICOM tags
def apply_slope_intercept(dicom_file):
array = dicom_file.pixel_array.copy()
try:
slope = float(dicom_file.RescaleSlope)
intercept = float(dicom_file.RescaleIntercept)
except Exception:
slope = 1
intercept = 0
if slope != 1 or intercept != 0:
array = array * slope
array = array + intercept
return array
# Removes zeroes around image
def remove_padding(array):
array = array.copy()
nonzeros = np.nonzero(array)
x1 = np.min(nonzeros[0]) ; x2 = np.max(nonzeros[0])
y1 = np.min(nonzeros[1]) ; y2 = np.max(nonzeros[1])
return array[x1:x2,y1:y2]
# Apply different W/L settings to different channels to take advantage of RGB structure
def apply_windows(array, windows):
layers = []
for values in windows:
if len(value) >= 2:
ww = values[0]
wl = values[1]
layers.append(np.expand_dims(window(array, WL=wl, WW=ww), axis=3))
if len(layers) == 0:
return np.expand_dims(window(array, WL=350, WW=40), axis=3)
else:
return np.concatenate(layers, axis=3)
# Resize
def resize(image, imsize):
rat = max(imsize) / np.max(image.shape[1:])
return zoom(image, [1.,rat,rat,1.], prefilter=False, order=1)
Common window width and window level for CT exams
Target | WW | WL |
---|---|---|
brain | 80 | 40 |
subdural | 215 | 75 |
stroke_1 | 8 | 32 |
stroke_2 | 40 | 40 |
temporal_bone | 2800 | 600 |
neck_soft_tissue | 375 | 40 |
lung | 1500 | -500 |
emphysema | 800 | -800 |
mediastinum | 400 | 40 |
pulmonary_embolism | 700 | 100 |
abdomen | 350 | 40 |
liver | 120 | 70 |
kidney | 700 | 50 |
bone | 2500 | 480 |
Convert JPG, PNG to DICOM
Converts a single jpg or png file to dicom format
import os
import subprocess
import tempfile
import uuid
from PIL import Image
from pydicom.uid import generate_uid
def from_jpeg(jpeg_fp, dicom_tags={}):
"""Converts JPEG to DICOM as secondary capture, with minimal DICOM tags.
If JPEG mode is RGBA or CMYK, we must first convert to RGB since these photometric
interpretations have been retired in the DICOM standard:
http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.7.6.3.html.
Returns: path to DICOM tempfile
"""
# If JPEG is RGBA/CMYK mode, convert to RGB mode first.
im = Image.open(jpeg_fp)
if im.mode in ("RGBA", "CMYK"):
im2 = im.convert("RGB")
im2.save(jpeg_fp)
im2.close()
im.close()
dicom_fp = os.path.join(tempfile.gettempdir(), f"{str(uuid.uuid4())}.dcm")
try:
cmd = ["img2dcm", jpeg_fp, dicom_fp]
for key, value in dicom_tags.items():
cmd.extend(["-k", f"{key}={value}"])
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
except Exception:
return None
return dicom_fp
def from_png(png_fp, dicom_tags={}):
"""Converts PNG to DICOM as secondary capture, with minimal DICOM tags.
Returns: path to DICOM tempfile
"""
jpeg_fp = os.path.join(tempfile.gettempdir(), f"{str(uuid.uuid4())}.jpg")
try:
cmd = ["convert", png_fp, jpeg_fp]
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
except Exception:
return None
return from_jpeg(jpeg_fp, dicom_tags=dicom_tags)
Convert CT nifti to DICOM
Use the convert_ct
function in common_utils
to convert nifti files to DICOM for uploading to MD.ai. You'll need to choose your input and output directories. Optionally, you can change the plane or default window/level settings. You'll also need a sample dicom which you can download from here
results = mdai.common_utils.convert_ct(
input_dir=None,
output_dir=None,
input_ext=".nii.gz",
plane="axial",
sample_dicom_fp=os.path.join(os.path.dirname(""), "./sample_dicom.dcm"),
window_center=40,
window_width=350,
)
This function will write the converted DICOM files to the output directory and will give each image from a nifti file the same Study and Series UIDs. Use the CLI tool to upload the newly created DICOM images into your project
Convert DICOM to nifti
Use the dicom2nifiti library from icometrix
import dicom2nifti
dicom2nifti.convert_directory(dicom_directory, output_folder, compression=True, reorient=True)
This will convert the dicom files from the dicom_directory to the output_folder.
Convert MR .mha files to DICOM
Use the function provided below to convert MR modality .mha
files to DICOM for uploading to MD.ai. You'll need to provide the filepath to the MHA file. Optionally, you can change the plane or default window/level settings. You'll also need a sample dicom file which you can download from here. This method was provided by Ramon Correa, one of our summer 2022 interns.
def convert_mha_file(
filepath,
output_dir='./processed_images',
plane="axial",
sample_dicom_fp='./sample_dicom.dcm',
window_center=400,
window_width=1000
):
"""Takes path to an mha file converts and saves as series of dicom files.
filepath: str path to an mha file.
output_dir:str root directory where images should be stored.
plane: Acquisition plane of the original image.
sample_dicom_fp: Path to dicom image to use as reference
window_center: Dicom Parameter used for windowing
window_width: Dicom Parameter used for windowing
"""
input_ext = '.mha'
voxel_arr,header = load(filepath)
pixdim = header.get_voxel_spacing()
# Image coordinates -> World coordinates
if plane == "axial":
slice_axis = 2
plane_axes = [0, 1]
elif plane == "coronal":
slice_axis = 1
plane_axes = [0, 2]
elif plane == "sagittal":
slice_axis = 0
plane_axes = [1, 2]
thickness = pixdim[slice_axis]
spacing = [pixdim[plane_axes[1]], pixdim[plane_axes[0]]]
# generate DICOM UIDs (StudyInstanceUID and SeriesInstanceUID)
study_uid = pyd.uid.generate_uid(prefix=None)
series_uid = pyd.uid.generate_uid(prefix=None)
# randomized patient ID
patient_id = str(uid.uuid.uuid4())
patient_name = patient_id
scale_slope = "1"
scale_intercept = "0"
# create base directory
base_dir = os.path.join( output_dir,
os.path.basename(filepath).replace(input_ext,"")
)
os.makedirs(base_dir,exist_ok=True)
for slice_index in range(voxel_arr.shape[-1]):
# generate SOPInstanceUID
instance_uid = pyd.uid.generate_uid(prefix=None)
loc = slice_index * thickness
ds = pyd.dcmread(sample_dicom_fp)
# delete tags
del ds[0x00200052] # Frame of Reference UID
del ds[0x00201040] # Position Reference Indicator
# slice and set PixelData tag
axes = [slice(None)] * 3
axes[slice_axis] = slice_index
arr = voxel_arr[:,:,slice_index].T.astype(np.int16)
ds[0x7fe00010].value = arr.tobytes()
# modify tags
# using code from original nifti2dcm
# - UIDs are created by pydicom.uid.generate_uid at each level above
# - image position is calculated by combination of slice index and slice thickness
# - slice location is set to the value of image position along z-axis
# - Rows/Columns determined by array shape
# - we set slope/intercept to 1/0 since we're directly converting from PNG pixel values
ds[0x00080018].value = instance_uid # SOPInstanceUID
ds[0x00100010].value = patient_name
ds[0x00100020].value = patient_id
ds[0x0020000d].value = study_uid # StudyInstanceUID
ds[0x0020000e].value = series_uid # SeriesInstanceUID
ds[0x0008103e].value = "" # Series Description
ds[0x00200011].value = "1" # Series Number
ds[0x00200012].value = str(slice_index + 1) # Acquisition Number
ds[0x00200013].value = str(slice_index + 1) # Instance Number
ds[0x00201041].value = str(loc) # Slice Location
ds[0x00280010].value = arr.shape[0] # Rows
ds[0x00280011].value = arr.shape[1] # Columns
ds[0x00280030].value = spacing # Pixel Spacing
ds[0x00281050].value = str(window_center) # Window Center
ds[0x00281051].value = str(window_width) # Window Width
ds[0x00281052].value = str(scale_intercept) # Rescale Intercept
ds[0x00281053].value = str(scale_slope) # Rescale Slope
ds.Modality = "MR"
# Image Position (Patient)
# Image Orientation (Patient)
if plane == "axial":
ds[0x00200032].value = ["0", "0", str(loc)]
ds[0x00200037].value = ["1", "0", "0", "0", "1", "0"]
elif plane == "coronal":
ds[0x00200032].value = ["0", str(loc), "0"]
ds[0x00200037].value = ["1", "0", "0", "0", "0", "1"]
elif plane == "sagittal":
ds[0x00200032].value = [str(loc), "0", "0"]
ds[0x00200037].value = ["0", "1", "0", "0", "0", "1"]
# add new tags
# see tag info e.g., from https://dicom.innolitics.com/ciods/nm-image/nm-reconstruction/00180050
# Slice Thickness
ds[0x00180050] = pyd.dataelem.DataElement(0x00180050, "DS", str(thickness))
ds.SeriesDescription = f"MR {plane}"
dicom_fp = os.path.join( output_dir,
os.path.basename(filepath).replace(input_ext,""),
"{:03}.dcm".format(slice_index + 1),
)
dcm_base,_ = os.path.split(dicom_fp)
os.makedirs(dcm_base,exist_ok=True)
pyd.dcmwrite(dicom_fp,ds)
Convert Freeform Annotations to DICOM SEG
Instructions can be found here.
Support functions
Read DICOM UIDs and tags from your original files
Use this code on your original data to create a dataframe of DICOM tags. These are sample tags, you can add or remove values, as needed.
from pathlib import Path
import pydicom
images_path = Path('MY_PATH')
filenames = list(images_path.glob('**/*.dcm'))
info = []
for f in filenames:
d = pydicom.dcmread(str(f),stop_before_pixels=True)
info.append({'fn':str(f),
'StudyInstanceUID':d.StudyInstanceUID,
'SeriesInstanceUID':d.SeriesInstanceUID,
'SOPInstanceUID':d.SOPInstanceUID,
'description':d.SeriesDescription if 'SeriesDescription' in d else "",
'name':d.SequenceName if 'SequenceName' in d else "",
'Modality':d.Modality if 'Modality' in d else "",
'ContrastAgent':d.ContrastBolusAgent if 'ContrastBolusAgent' in d else "",
'ScanOptions':d.ScanOptions if 'ScanOptions' in d else "",
'WW':d.WindowWidth if 'WindowWidth' in d else "",
'WC':d.WindowCenter if 'WindowCenter' in d else "",
'ImageType' :d.ImageType if 'ImageType' in d else "",
'PixelSpacing' :d.PixelSpacing if 'PixelSpacing' in d else "",
'SliceThickness':d.SliceThickness if 'SliceThickness' in d else "",
'PhotometricInterpretation':d.PhotometricInterpretation if 'PhotometricInterpretation' in d else ""
})
df = pd.DataFrame(info)