Skip to content

Commit

Permalink
Add support for arbitrary columns and fix bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
kostrykin committed Sep 24, 2024
1 parent 44ccfb9 commit 135f95c
Show file tree
Hide file tree
Showing 6 changed files with 120 additions and 25 deletions.
79 changes: 59 additions & 20 deletions tools/points2binaryimage/points2binaryimage.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,75 @@
import argparse
import os
import warnings
from typing import List

import numpy as np
import pandas as pd
import scipy.ndimage as ndi
import skimage.io


def points2binaryimage(point_file, out_file, shape=[500, 500], has_header=False, swap_xy=False):
def find_column(df: pd.DataFrame, candidates: List[str]) -> str:
"""
Returns tje column name present in `df` and the list of `candidates`.
img = np.zeros(shape, dtype=np.int16)
Raises:
KeyError: If there is no candidate column name present in `df`, or more than one.
"""
intersection = frozenset(df.columns) & frozenset(candidates)
if len(intersection) == 0:
raise KeyError(f'No such column: {", ".join(candidates)}')
elif len(intersection) > 1:
raise KeyError(f'The column names {", ".join(intersection)} are ambiguous')
else:
return next(iter(intersection))


def points2binaryimage(point_file, out_file, shape, has_header=False, swap_xy=False, bg_value=0, fg_value=0xffff):

img = np.full(shape, dtype=np.uint16, fill_value=bg_value)
if os.path.exists(point_file) and os.path.getsize(point_file) > 0:

# Read the tabular file with information from the header
if has_header:
df = pd.read_csv(point_file, skiprows=1, header=None, delimiter="\t")
df = pd.read_csv(point_file, delimiter='\t')
pos_x_column = find_column(df, ['pos_x', 'POS_X'])
pos_y_column = find_column(df, ['pos_y', 'POS_Y'])
pos_x_list = df[pos_x_column].round().astype(int)
pos_y_list = df[pos_y_column].round().astype(int)
assert len(pos_x_list) == len(pos_y_list)
try:
radius_column = find_column(df, ['radius', 'RADIUS'])
radius_list = df[radius_column]
except KeyError:
radius_list = [0] * len(pos_x_list)

# Read the tabular file without header
else:
df = pd.read_csv(point_file, header=None, delimiter="\t")

for i in range(0, len(df)):
a_row = df.iloc[i]
if int(a_row[0]) < 0 or int(a_row[1]) < 0:
raise IndexError("Point {},{} is out of image with bounds {},{}.".format(int(a_row[0]), int(a_row[1]), shape[0], shape[1]))

if swap_xy:
if img.shape[0] <= int(a_row[0]) or img.shape[1] <= int(a_row[1]):
raise IndexError("Point {},{} is out of image with bounds {},{}.".format(int(a_row[0]), int(a_row[1]), shape[0], shape[1]))
else:
img[int(a_row[1]), int(a_row[0])] = 32767
df = pd.read_csv(point_file, header=None, delimiter='\t')
pos_x_list = df[0].round().astype(int)
pos_y_list = df[1].round().astype(int)
assert len(pos_x_list) == len(pos_y_list)
radius_list = [0] * len(pos_x_list)

# Optionally swap the coordinates
if swap_xy:
pos_x_list, pos_y_list = pos_y_list, pos_x_list

# Perform the rasterization
for y, x, radius in zip(pos_y_list, pos_x_list, radius_list):

if y < 0 or x < 0 or y >= shape[0] or x >= shape[1]:
raise IndexError(f'The point x={x}, y={y} exceeds the bounds of the image (width: {shape[1]}, height: {shape[0]})')

if radius > 0:
mask = np.ones(shape, dtype=bool)
mask[y, x] = False
mask = (ndi.distance_transform_edt(mask) <= radius)
img[mask] = fg_value
else:
if img.shape[0] <= int(a_row[1]) or img.shape[1] <= int(a_row[0]):
raise IndexError("Point {},{} is out of image with bounds {},{}.".format(int(a_row[1]), int(a_row[0]), shape[0], shape[1]))
else:
img[int(a_row[0]), int(a_row[1])] = 32767
img[y, x] = fg_value

else:
raise Exception("{} is empty or does not exist.".format(point_file)) # appropriate built-in error?

Expand All @@ -51,4 +90,4 @@ def points2binaryimage(point_file, out_file, shape=[500, 500], has_header=False,
args = parser.parse_args()

# TOOL
points2binaryimage(args.point_file.name, args.out_file, [args.shapey, args.shapex], has_header=args.has_header, swap_xy=args.swap_xy)
points2binaryimage(args.point_file.name, args.out_file, (args.shapey, args.shapex), has_header=args.has_header, swap_xy=args.swap_xy)
27 changes: 22 additions & 5 deletions tools/points2binaryimage/points2binaryimage.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
<macros>
<import>creators.xml</import>
<import>tests.xml</import>
<token name="@TOOL_VERSION@">0.2</token>
<token name="@VERSION_SUFFIX@">3</token>
<token name="@TOOL_VERSION@">0.3</token>
<token name="@VERSION_SUFFIX@">0</token>
</macros>
<creator>
<expand macro="creators/bmcv" />
Expand Down Expand Up @@ -46,22 +46,39 @@
<!-- TSV without header -->
<test>
<param name="input" value="input1.tsv" />
<param name="shapex" value="20" />
<param name="shapey" value="30" />
<param name="shapex" value="30" />
<param name="shapey" value="20" />
<param name="has_header" value="false" />
<param name="swap_xy" value="true" />
<expand macro="tests/binary_image_diff" name="output" value="output1.tif" ftype="tiff" />
</test>
<!-- TSV with header -->
<test>
<param name="input" value="input2.tsv" />
<param name="shapex" value="205" />
<param name="shapey" value="84" />
<param name="has_header" value="true" />
<param name="swap_xy" value="false" />
<expand macro="tests/binary_image_diff" name="output" value="output2.tif" ftype="tiff" />
</test>
</tests>
<help>

**Converts a tabular list of points to a binary image by rasterizing the point coordinates.**

The created image is a single-channel image with 16 bits per pixel (unsigned integer).
The points are rasterized with value 32767 (white).
The points are rasterized with value 65535 (white).
Pixels not corresponding to any points in the tabular file are assigned the value 0 (black).

The tabular list of points can either be header-less.
In this case, the first and second columns are expected to be the X and Y coordinates, respectively.
Otherwise, if a header is present, it is searched for the following column names:

- ``pos_x`` or ``POS_X``: This column corresponds to the X coordinates.
- ``pos_y`` or ``POS_Y``: This column corresponds to the Y coordinates.
- If a ``radius`` or ``RADIUS`` column is present,
then the points will be rasterized as circles of the corresponding radii.

</help>
<citations>
<citation type="doi">10.1016/j.jbiotec.2017.07.019</citation>
Expand Down
39 changes: 39 additions & 0 deletions tools/points2binaryimage/test-data/input2.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
frame pos_x pos_y scale radius intensity
1 85 32 1.33 3.77 18807.73
1 190 25 1.78 5.03 24581.44
1 137 26 1.44 4.09 19037.59
1 63 42 1.44 4.09 22390.80
1 107 44 1.33 3.77 23429.96
1 61 27 1.56 4.40 18052.18
1 158 39 1.44 4.09 18377.02
1 190 14 1.33 3.77 18548.86
1 182 33 1.78 5.03 26467.79
1 39 39 1.44 4.09 14782.43
1 169 26 1.33 3.77 14203.41
1 61 54 1.33 3.77 23248.06
1 95 52 1.33 3.77 21480.71
1 23 60 1.89 5.34 25203.43
1 84 24 1.56 4.40 16630.57
1 121 47 1.67 4.71 15459.11
1 66 49 1.11 3.14 23858.07
1 115 36 2.00 5.66 16389.10
1 55 51 1.33 3.77 23548.90
1 130 72 1.67 4.71 15769.02
1 117 23 1.33 3.77 16763.14
1 45 52 1.56 4.40 22877.61
1 36 71 1.56 4.40 20780.96
1 78 17 1.33 3.77 16844.51
1 101 38 1.56 4.40 21376.59
1 147 31 1.78 5.03 16597.14
1 163 55 2.00 5.66 18301.54
1 164 23 1.33 3.77 17073.82
1 150 24 1.56 4.40 15440.02
1 151 67 1.78 5.03 18419.96
1 26 53 2.00 5.66 20586.01
1 79 62 1.33 3.77 15232.88
1 69 17 1.11 3.14 15601.83
1 83 52 1.33 3.77 18315.00
1 16 54 2.00 5.66 22140.66
1 166 61 1.78 5.03 18488.78
1 163 43 1.44 4.09 16925.49
1 130 53 1.78 5.03 15101.96
Binary file not shown.
Binary file modified tools/points2binaryimage/test-data/output1.tif
Binary file not shown.
Binary file added tools/points2binaryimage/test-data/output2.tif
Binary file not shown.

0 comments on commit 135f95c

Please sign in to comment.