Convert DICOM
Working with DICOM converted from JPEG/PNG
When jpeg or png images are uploaded to an MD.ai DICOM dataset, it gets wrapped as a DICOM file with the appropriate DICOM tags. Optionally, these images can be specified using a filename schema that defines hierarchical relationships (exam/series/image). When this dataset is exported, the files are DICOM, not the original jpeg/png images. The original jpeg/png image filenames can be found in the ImageComments
tag.
If you would like to use the converted DICOM directly, care must be taken to use the correct color space, since JPEG uses YCbCr, which is specified by DICOM in the PhotometricInterpretation
tag with a value of YBR_FULL_422
(or variants). Usually, we want to normalize to the RGB color space when working with machine learning code. The following example code ensures that the resulting pixel array from color DICOM (such as those converted from JPEG/PNG) is converted to RGB, by reading the SamplesPerPixel
and PhotometricInterpretation
tags and calling the pydicom
utility function convert_color_space
if necessary:
import pydicom
from pydicom.pixel_data_handlers.util import convert_color_space
ds = pydicom.dcmread('example.dcm')
try:
is_color = ds.SamplesPerPixel == 3
except Exception:
is_color = False
try:
color_space = ds.PhotometricInterpretation
except Exception:
color_space = ""
arr = ds.pixel_array
if is_color and color_space != "RGB":
arr = convert_color_space(arr, color_space, "RGB")
Convert DICOM to JPEG/PNG
Converts a single dicom file to an 8 bit format
# Code modified from Ian Pan https://storage.googleapis.com/kaggle-forum-message-attachments/1010629/17014/convert_to_jpeg_for_kaggle.py
def convert_ct_dicom_to_8bit(dicom_file, windows = [[350,40],[1500,-500],[120,70]], imsize=(256.,256.), should_remove_padding = True):
'''
Given a DICOM file, window specifications, and image size, return the
image as a Numpy array scaled to [0,255] of the specified size.
Parameters
----------
dicom_file: str
filename that ends in .dcm
windows: list of lists of ints
list of window width and window level values
imsize: tuplet of float
desired output image size
should_remove_padding: bool
if True will remove extra rows/columns of zeroes around the image
'''
array = apply_slope_intercept(dicom_file)
if should_remove_padding:
array = remove_padding(array)
# different width, level for each RGB channel
image = apply_windows(array, windows)
# resize
image = resize(image, imsize)
return image
Save as jpg or png
image = convert_ct_dicom_to_8bit(dicom_file, windows = [[350,40],[1500,-500],[120,70]], imsize=(256.,256.), should_remove_padding = True)
im = Image.fromarray(image)
im.save(dicom_file[-4:] + '.jpg')
# or
im.save(dicom_file[-4:] + '.png')
Supporting functions
import pandas as pd
import numpy as np
import pydicom
from scipy.ndimage.interpolation import zoom
# Applies slope and intercept from DICOM tags
def apply_slope_intercept(dicom_file):
array = dicom_file.pixel_array.copy()
try:
slope = float(dicom_file.RescaleSlope)
intercept = float(dicom_file.RescaleIntercept)
except Exception:
slope = 1
intercept = 0
if slope != 1 or intercept != 0:
array = array * slope
array = array + intercept
return array
# Removes zeroes around image
def remove_padding(array):
array = array.copy()
nonzeros = np.nonzero(array)
x1 = np.min(nonzeros[0]) ; x2 = np.max(nonzeros[0])
y1 = np.min(nonzeros[1]) ; y2 = np.max(nonzeros[1])
return array[x1:x2,y1:y2]
# Apply different W/L settings to different channels to take advantage of RGB structure
def apply_windows(array, windows):
layers = []
for values in windows:
if len(value) >= 2:
ww = values[0]
wl = values[1]
layers.append(np.expand_dims(window(array, WL=wl, WW=ww), axis=3))
if len(layers) == 0:
return np.expand_dims(window(array, WL=350, WW=40), axis=3)
else:
return np.concatenate(layers, axis=3)
# Resize
def resize(image, imsize):
rat = max(imsize) / np.max(image.shape[1:])
return zoom(image, [1.,rat,rat,1.], prefilter=False, order=1)
Common window width and window level for CT exams
Target | WW | WL |
---|---|---|
brain | 80 | 40 |
subdural | 215 | 75 |
stroke_1 | 8 | 32 |
stroke_2 | 40 | 40 |
temporal_bone | 2800 | 600 |
neck_soft_tissue | 375 | 40 |
lung | 1500 | -500 |
emphysema | 800 | -800 |
mediastinum | 400 | 40 |
pulmonary_embolism | 700 | 100 |
abdomen | 350 | 40 |
liver | 120 | 70 |
kidney | 700 | 50 |
bone | 2500 | 480 |
Convert JPG, PNG to DICOM
Converts a single jpg or png file to dicom format
import os
import subprocess
import tempfile
import uuid
from PIL import Image
from pydicom.uid import generate_uid
def from_jpeg(jpeg_fp, dicom_tags={}):
"""Converts JPEG to DICOM as secondary capture, with minimal DICOM tags.
If JPEG mode is RGBA or CMYK, we must first convert to RGB since these photometric
interpretations have been retired in the DICOM standard:
http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.7.6.3.html.
Returns: path to DICOM tempfile
"""
# If JPEG is RGBA/CMYK mode, convert to RGB mode first.
im = Image.open(jpeg_fp)
if im.mode in ("RGBA", "CMYK"):
im2 = im.convert("RGB")
im2.save(jpeg_fp)
im2.close()
im.close()
dicom_fp = os.path.join(tempfile.gettempdir(), f"{str(uuid.uuid4())}.dcm")
try:
cmd = ["img2dcm", jpeg_fp, dicom_fp]
for key, value in dicom_tags.items():
cmd.extend(["-k", f"{key}={value}"])
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
except Exception:
return None
return dicom_fp
def from_png(png_fp, dicom_tags={}):
"""Converts PNG to DICOM as secondary capture, with minimal DICOM tags.
Returns: path to DICOM tempfile
"""
jpeg_fp = os.path.join(tempfile.gettempdir(), f"{str(uuid.uuid4())}.jpg")
try:
cmd = ["convert", png_fp, jpeg_fp]
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
except Exception:
return None
return from_jpeg(jpeg_fp, dicom_tags=dicom_tags)
Convert CT nifti to DICOM
Use the convert_ct
function in common_utils
to convert nifti files to DICOM for uploading to MD.ai. You'll need to choose your input and output directories. Optionally, you can change the plane or default window/level settings. You'll also need a sample dicom which you can download from here
results = mdai.common_utils.convert_ct(
input_dir=None,
output_dir=None,
input_ext=".nii.gz",
plane="axial",
sample_dicom_fp=os.path.join(os.path.dirname(""), "./sample_dicom.dcm"),
window_center=40,
window_width=350,
)
This function will write the converted DICOM files to the output directory and will give each image from a nifti file the same Study and Series UIDs. Use the CLI tool to upload the newly created DICOM images into your project
Convert DICOM to nifti
Use the dicom2nifiti library from icometrix
import dicom2nifti
dicom2nifti.convert_directory(dicom_directory, output_folder, compression=True, reorient=True)
This will convert the dicom files from the dicom_directory to the output_folder.
Convert MR .mha files to DICOM
Use the function provided below to convert MR modality .mha
files to DICOM for uploading to MD.ai. You'll need to provide the filepath to the MHA file. Optionally, you can change the plane or default window/level settings. You'll also need a sample dicom file which you can download from here. This method was provided by Ramon Correa, one of our summer 2022 interns.
import os
import pydicom as pyd
from pydicom import uid
from medpy.io import load
import numpy as np
def convert_mha_file(
filepath,
output_dir='./processed_images',
plane="axial",
sample_dicom_fp='./sample_dicom.dcm',
window_center=400,
window_width=1000
):
"""Takes path to an mha file converts and saves as series of dicom files.
filepath: str path to an mha file.
output_dir:str root directory where images should be stored.
plane: Acquisition plane of the original image.
sample_dicom_fp: Path to dicom image to use as reference
window_center: Dicom Parameter used for windowing
window_width: Dicom Parameter used for windowing
"""
input_ext = '.mha'
voxel_arr,header = load(filepath)
pixdim = header.get_voxel_spacing()
# Image coordinates -> World coordinates
if plane == "axial":
slice_axis = 2
plane_axes = [0, 1]
elif plane == "coronal":
slice_axis = 1
plane_axes = [0, 2]
elif plane == "sagittal":
slice_axis = 0
plane_axes = [1, 2]
thickness = pixdim[slice_axis]
spacing = [pixdim[plane_axes[1]], pixdim[plane_axes[0]]]
# generate DICOM UIDs (StudyInstanceUID and SeriesInstanceUID)
study_uid = pyd.uid.generate_uid(prefix=None)
series_uid = pyd.uid.generate_uid(prefix=None)
# randomized patient ID
patient_id = str(uid.uuid.uuid4())
patient_name = patient_id
scale_slope = "1"
scale_intercept = "0"
# create base directory
base_dir = os.path.join( output_dir,
os.path.basename(filepath).replace(input_ext,"")
)
os.makedirs(base_dir,exist_ok=True)
for slice_index in range(voxel_arr.shape[-1]):
# generate SOPInstanceUID
instance_uid = pyd.uid.generate_uid(prefix=None)
loc = slice_index * thickness
ds = pyd.dcmread(sample_dicom_fp)
# slice and set PixelData tag
axes = [slice(None)] * 3
axes[slice_axis] = slice_index
arr = voxel_arr[:,:,slice_index].T.astype(np.int16)
ds[0x7fe00010].value = arr.tobytes()
# modify tags
# using code from original nifti2dcm
# - UIDs are created by pydicom.uid.generate_uid at each level above
# - image position is calculated by combination of slice index and slice thickness
# - slice location is set to the value of image position along z-axis
# - Rows/Columns determined by array shape
# - we set slope/intercept to 1/0 since we're directly converting from PNG pixel values
ds[0x00080018].value = instance_uid # SOPInstanceUID
ds[0x00100010].value = patient_name
ds[0x00100020].value = patient_id
ds[0x0020000d].value = study_uid # StudyInstanceUID
ds[0x0020000e].value = series_uid # SeriesInstanceUID
ds[0x0008103e].value = "" # Series Description
ds[0x00200011].value = "1" # Series Number
ds[0x00200012].value = str(slice_index + 1) # Acquisition Number
ds[0x00200013].value = str(slice_index + 1) # Instance Number
ds[0x00201041].value = str(loc) # Slice Location
ds[0x00280010].value = arr.shape[0] # Rows
ds[0x00280011].value = arr.shape[1] # Columns
ds[0x00280030].value = spacing # Pixel Spacing
ds[0x00281050].value = str(window_center) # Window Center
ds[0x00281051].value = str(window_width) # Window Width
ds[0x00281052].value = str(scale_intercept) # Rescale Intercept
ds[0x00281053].value = str(scale_slope) # Rescale Slope
ds.Modality = "MR"
# Image Position (Patient)
# Image Orientation (Patient)
if plane == "axial":
ds[0x00200032].value = ["0", "0", str(loc)]
ds[0x00200037].value = ["1", "0", "0", "0", "1", "0"]
elif plane == "coronal":
ds[0x00200032].value = ["0", str(loc), "0"]
ds[0x00200037].value = ["1", "0", "0", "0", "0", "1"]
elif plane == "sagittal":
ds[0x00200032].value = [str(loc), "0", "0"]
ds[0x00200037].value = ["0", "1", "0", "0", "0", "1"]
# add new tags
# see tag info e.g., from https://dicom.innolitics.com/ciods/nm-image/nm-reconstruction/00180050
# Slice Thickness
ds[0x00180050] = pyd.dataelem.DataElement(0x00180050, "DS", str(thickness))
ds.SeriesDescription = f"MR {plane}"
dicom_fp = os.path.join( output_dir,
os.path.basename(filepath).replace(input_ext,""),
"{:03}.dcm".format(slice_index + 1),
)
dcm_base,_ = os.path.split(dicom_fp)
os.makedirs(dcm_base,exist_ok=True)
pyd.dcmwrite(dicom_fp,ds)
Convert Annotations to DICOM SR
With the util class dicom_utils.SrExport()
, all annotations will be converted, capturing the label's and their parent's name in SR format (The specific Code Value of each annotation will be arbitrary). An SR file will be created for each annotator in each annotated study. The SR's DICOM data will be consistent with the study it references. The study and annotation information comes from the inputted "Annotation Json" and "Metadata Json" (These jsons have to be referencing the same dataset(s) for it to work).
The labels will be ordered from exam level to series to image. Each image level label will have a "Referenced Image" section preceding the label to indicate it's source. Series labels will have "Series UID: xxx" under them for better referencing as well.
import mdai
from glob import glob
# Define your personal token and project id
personal_token = 'a1s2d3f4g4h5h59797kllh8vk' # put your token here
project_id = 'LFdpnJGv'
# Create an mdai client
mdai_client = mdai.Client(domain='public.md.ai', access_token=personal_token)
# Download the annotation data only (all label groups)
p = mdai_client.project(project_id, path='.', annotations_only=True)
# Download only the DICOM metadata
p = mdai_client.download_dicom_metadata(project_id, format ='json', path='.')
# Use glob to find the downloaded json files (or get them manually)
annotation_file = glob('*annotations*.json')[0]
metadata_file = glob('*dicom_metadata*.json')[0]
# Use the SrExport class to export the annotations to DICOM SR format
exporter = mdai.dicom_utils.SrExport(annotation_json=annotation_file, metadata_json=metadata_file, output_dir='out_folder')
Convert Annotations to DICOM SEG (Recommended)
With the util class dicom_utils.SegExport()
, only local annotations will be exported. This export process converts the annotation data into a binary mask, and creates the relevant segmentation DICOM data to export a DICOM Segmentation file. A Segmentation file will be created for each annotator in each annotated series. Additionally, if combine_label_groups
is False, a different file will be created for each label group. The file's DICOM headers will be consistent with the original DICOM's. The study and annotation information comes from the inputted "Annotation Json" and "Metadata Json" (These jsons have to be referencing the same dataset(s) for it to work).
The segmentation frames are grouped together by their labels and within those groups, they are ordered by their source's frame number.
import mdai
from glob import glob
# Define your personal token and project id
personal_token = 'a1s2d3f4g4h5h59797kllh8vk' # put your token here
project_id = 'LFdpnJGv'
# Create an mdai client
mdai_client = mdai.Client(domain='public.md.ai', access_token=personal_token)
# Download the annotation data only (all label groups)
p = mdai_client.project(project_id, path='.', annotations_only=True)
# Download only the DICOM metadata
p = mdai_client.download_dicom_metadata(project_id, format ='json', path='.')
# Use glob to find the downloaded json files (or get them manually)
annotation_file = glob('*annotations*.json')[0]
metadata_file = glob('*dicom_metadata*.json')[0]
# Use the SegExport class to export the annotations to DICOM SEG format
exporter = mdai.dicom_utils.SegExport(annotation_json=annotation_file, metadata_json=metadata_file, combine_label_groups=True, output_dir='out_folder')
Convert Freeform Annotations to DICOM SEG (Legacy)
Instructions can be found here.
Support functions
Read DICOM UIDs and tags from your original files
Use this code on your original data to create a dataframe of DICOM tags. These are sample tags, you can add or remove values, as needed.
from pathlib import Path
import pydicom
images_path = Path('MY_PATH')
filenames = list(images_path.glob('**/*.dcm'))
info = []
for f in filenames:
d = pydicom.dcmread(str(f),stop_before_pixels=True)
info.append({'fn':str(f),
'StudyInstanceUID':d.StudyInstanceUID,
'SeriesInstanceUID':d.SeriesInstanceUID,
'SOPInstanceUID':d.SOPInstanceUID,
'description':d.SeriesDescription if 'SeriesDescription' in d else "",
'name':d.SequenceName if 'SequenceName' in d else "",
'Modality':d.Modality if 'Modality' in d else "",
'ContrastAgent':d.ContrastBolusAgent if 'ContrastBolusAgent' in d else "",
'ScanOptions':d.ScanOptions if 'ScanOptions' in d else "",
'WW':d.WindowWidth if 'WindowWidth' in d else "",
'WC':d.WindowCenter if 'WindowCenter' in d else "",
'ImageType' :d.ImageType if 'ImageType' in d else "",
'PixelSpacing' :d.PixelSpacing if 'PixelSpacing' in d else "",
'SliceThickness':d.SliceThickness if 'SliceThickness' in d else "",
'PhotometricInterpretation':d.PhotometricInterpretation if 'PhotometricInterpretation' in d else ""
})
df = pd.DataFrame(info)