Divide and rule#

Sometimes, programs become very long and hard to read. We speak of spaghetti code.

from skimage.io import imread
from skimage.morphology import white_tophat, disk
from skimage.filters import gaussian, threshold_otsu
from skimage.measure import label, regionprops_table
import pandas as pd
import numpy as np
image = imread("../../data/blobs.tif")
footprint = disk(15)
background_subtracted = white_tophat(image, 
                                     footprint=footprint)
particle_radius = 5
denoised = gaussian(background_subtracted, 
                    sigma=particle_radius)
binary = denoised > threshold_otsu(denoised)
labels = label(binary)
requested_measurements = ["label", "area", "mean_intensity"]
regionprops = regionprops_table(image, 
                                labels, 
                                properties=requested_measurements)
table = pd.DataFrame(regionprops)
mean_total_intensity = np.mean(table["area"] * table["mean_intensity"])
mean_total_intensity
17136.90322580645

A common and easy way for making such code easier to read is to split it into sections which start with a comment each.

# configuration
file_to_analyze = "../../data/blobs.tif"
background_subtraction_radius = 15
particle_radius = 5
requested_measurements = ["area", "mean_intensity"]

# load data
image = imread(file_to_analyze)

# preprocess image
footprint = disk(background_subtraction_radius)
background_subtracted = white_tophat(image, 
                                     footprint=footprint)
denoised = gaussian(background_subtracted, 
                    sigma=particle_radius)

# segment image
binary = denoised > threshold_otsu(denoised)
labels = label(binary)

# extract features
regionprops = regionprops_table(image, 
                                labels, 
                                properties=requested_measurements)
table = pd.DataFrame(regionprops)

# descriptive statistics
mean_total_intensity = np.mean(table["area"] * table["mean_intensity"])
mean_total_intensity
17136.90322580645

More professional would be to put all this code into meaningful sub-routines and call them from a central function.

# reusable functions
def preprocess_image(image, background_subtraction_radius, particle_radius):
    """Apply background removal and denoising"""
    footprint = disk(background_subtraction_radius)
    background_subtracted = white_tophat(image, footprint=footprint)
    denoised = gaussian(background_subtracted, sigma=particle_radius)
    return denoised

def segment_image(image):
    """Apply thresholding and connected component analysis"""
    binary = image > threshold_otsu(image)
    labels = label(binary)
    return labels

def extract_features(image, labels, requested_measurements):
    """Measure specified properties"""
    regionprops = regionprops_table(image, 
                                    labels, 
                                    properties=requested_measurements)
    table = pd.DataFrame(regionprops)
    return table

def analyse_average_total_intensity(filename, 
                                    background_subtraction_radius = 15, 
                                    particle_radius = 5):
    """Load an image, segment objects and measure their mean total intensity."""
    image = imread(filename)
    denoised = preprocess_image(image, 
                                background_subtraction_radius, 
                                particle_radius)
    labels = segment_image(denoised)
    requested_measurements = ["area", "mean_intensity"]
    table = extract_features(image, 
                             labels, 
                             requested_measurements)

    # descriptive statistics
    mean_total_intensity = np.mean(table["area"] * table["mean_intensity"])
    
    return mean_total_intensity
# configuration
file_to_analyze = "../../data/blobs.tif"

This central function can then be read easily; it has just 6 lines of code

analyse_average_total_intensity(file_to_analyze)
17136.90322580645

This function can then be reused also for other image files.

analyse_average_total_intensity("../../data/BBBC007_batch/20P1_POS0005_D_1UL.tif")
884.2620087336245