# -*- coding: utf-8 -*-
"""
this module houses all the code to just convert a directory of random dicom files
@author: abrys
"""
import dicom2nifti.compressed_dicom as compressed_dicom
import gc
import os
import re
import traceback
import unicodedata
from pydicom.tag import Tag
import logging
import dicom2nifti.common as common
import dicom2nifti.convert_dicom as convert_dicom
import dicom2nifti.settings
logger = logging.getLogger(__name__)
[docs]def convert_directory(dicom_directory, output_folder, compression=True, reorient=True):
"""
This function will order all dicom files by series and order them one by one
:param compression: enable or disable gzip compression
:param reorient: reorient the dicoms according to LAS orientation
:param output_folder: folder to write the nifti files to
:param dicom_directory: directory with dicom files
"""
# sort dicom files by series uid
dicom_series = {}
for root, _, files in os.walk(dicom_directory):
for dicom_file in files:
file_path = os.path.join(root, dicom_file)
# noinspection PyBroadException
try:
if compressed_dicom.is_dicom_file(file_path):
# read the dicom as fast as possible
# (max length for SeriesInstanceUID is 64 so defer_size 100 should be ok)
dicom_headers = compressed_dicom.read_file(file_path,
defer_size="1 KB",
stop_before_pixels=False,
force=dicom2nifti.settings.pydicom_read_force)
if not _is_valid_imaging_dicom(dicom_headers):
logger.info("Skipping: %s" % file_path)
continue
logger.info("Organizing: %s" % file_path)
if dicom_headers.SeriesInstanceUID not in dicom_series:
dicom_series[dicom_headers.SeriesInstanceUID] = []
dicom_series[dicom_headers.SeriesInstanceUID].append(dicom_headers)
except: # Explicitly capturing all errors here to be able to continue processing all the rest
logger.warning("Unable to read: %s" % file_path)
traceback.print_exc()
# start converting one by one
for series_id, dicom_input in dicom_series.items():
base_filename = ""
# noinspection PyBroadException
try:
# construct the filename for the nifti
base_filename = ""
if 'SeriesNumber' in dicom_input[0]:
base_filename = _remove_accents('%s' % dicom_input[0].SeriesNumber)
if 'SeriesDescription' in dicom_input[0]:
base_filename = _remove_accents('%s_%s' % (base_filename,
dicom_input[0].SeriesDescription))
elif 'SequenceName' in dicom_input[0]:
base_filename = _remove_accents('%s_%s' % (base_filename,
dicom_input[0].SequenceName))
elif 'ProtocolName' in dicom_input[0]:
base_filename = _remove_accents('%s_%s' % (base_filename,
dicom_input[0].ProtocolName))
else:
base_filename = _remove_accents(dicom_input[0].SeriesInstanceUID)
logger.info('--------------------------------------------')
logger.info('Start converting %s' % base_filename)
if compression:
nifti_file = os.path.join(output_folder, base_filename + '.nii.gz')
else:
nifti_file = os.path.join(output_folder, base_filename + '.nii')
convert_dicom.dicom_array_to_nifti(dicom_input, nifti_file, reorient)
gc.collect()
except: # Explicitly capturing app exceptions here to be able to continue processing
logger.info("Unable to convert: %s" % base_filename)
traceback.print_exc()
def _is_valid_imaging_dicom(dicom_header):
"""
Function will do some basic checks to see if this is a valid imaging dicom
"""
# if it is philips and multiframe dicom then we assume it is ok
try:
if common.is_philips([dicom_header]) or common.is_siemens([dicom_header]):
if common.is_multiframe_dicom([dicom_header]):
return True
if "SeriesInstanceUID" not in dicom_header:
return False
if "InstanceNumber" not in dicom_header:
return False
if "ImageOrientationPatient" not in dicom_header or len(dicom_header.ImageOrientationPatient) < 6:
return False
if "ImagePositionPatient" not in dicom_header or len(dicom_header.ImagePositionPatient) < 3:
return False
# for all others if there is image position patient we assume it is ok
if Tag(0x0020, 0x0037) not in dicom_header:
return False
return True
except (KeyError, AttributeError):
return False
def _remove_accents(unicode_filename):
"""
Function that will try to remove accents from a unicode string to be used in a filename.
input filename should be either an ascii or unicode string
"""
# noinspection PyBroadException
try:
unicode_filename = unicode_filename.replace(" ", "_")
cleaned_filename = unicodedata.normalize('NFKD', unicode_filename).encode('ASCII', 'ignore').decode('ASCII')
cleaned_filename = re.sub(r'[^\w\s-]', '', cleaned_filename.strip().lower())
cleaned_filename = re.sub(r'[-\s]+', '-', cleaned_filename)
return cleaned_filename
except:
traceback.print_exc()
return unicode_filename
def _remove_accents_(unicode_filename):
"""
Function that will try to remove accents from a unicode string to be used in a filename.
input filename should be either an ascii or unicode string
"""
valid_characters = bytes(b'-_.() 1234567890abcdefghijklmnopqrstuvwxyz')
cleaned_filename = unicodedata.normalize('NFKD', unicode_filename).encode('ASCII', 'ignore')
new_filename = ""
for char_int in bytes(cleaned_filename):
char_byte = bytes([char_int])
if char_byte in valid_characters:
new_filename += char_byte.decode()
return new_filename