# -*- coding: utf-8 -*-
# ELEKTRONN2 Toolkit
# Copyright (c) 2015 Marius Killinger
# All rights reserved
from __future__ import absolute_import, division, print_function
from builtins import filter, hex, input, int, map, next, oct, pow, range, super, zip
__all__ = ['AgentData', 'BatchCreatorImage', 'GridData']
import gc
import logging
import os
import sys
import time
import getpass
from . import non_geometric_augmentation as nga
try:
from importlib import reload
except ImportError:
pass # reload() is built-in in Python 2
import h5py
import numpy as np
import tqdm
from . import transformations
from .image import make_affinities, make_nhood_targets
from .. import utils
from ..config import config
logger = logging.getLogger('elektronn2log')
inspection_logger = logging.getLogger('elektronn2log-inspection')
user_name = getpass.getuser()
###############################################################################
def greyAugment(d, channels, rng):
"""
Performs grey value (histogram) augmentations on ``d``. This is only
applied to ``channels`` (list of channels indices), ``rng`` is a random
number generator
"""
if channels == []:
return d
else:
k = len(channels)
d = d.copy() # d is still just a view, we don't want to change the original data so copy it
alpha = 1 + (rng.rand(k) - 0.5) * 0.3 # ~ contrast
c = (rng.rand(k) - 0.5) * 0.3 # mediates whether values are clipped for shadows or lights
gamma = 2.0 ** (rng.rand(k) * 2 - 1) # sample from [0.5,2] with mean 0
d[channels] = d[channels] * alpha[:,None,None] + c[:,None,None]
d[channels] = np.clip(d[channels], 0, 1)
d[channels] = d[channels] ** gamma[:,None,None]
return d
def border_treatment(data_list, ps, border_mode, ndim):
def treatArray(data):
if border_mode=='keep':
return data
sh = data.shape[1:] # (z,y,x)/(x,y)
if border_mode=='crop':
excess = [int((x[0] - x[1])//2) for x in zip(sh, ps)]
if ndim == 3:
data = data[:,
excess[0]:excess[0]+ps[0],
excess[1]:excess[1]+ps[1],
excess[2]:excess[2]+ps[2]]
elif ndim==2:
data = data[:,
:,
excess[0]:excess[0]+ps[0],
excess[1]:excess[1]+ps[1]]
else:
excess_l = [int(np.ceil(float(x[0] - x[1])/2)) for x in zip(ps, sh)]
excess_r = [int(np.floor(float(x[0] - x[1])/2)) for x in zip(ps, sh)]
if ndim == 3:
pad_with = [(0,0),
(excess_l[0],excess_r[0]),
(excess_l[1],excess_r[1]),
(excess_l[2],excess_r[2])]
else:
pad_with = [(0,0),
(0,0),
(excess_l[0],excess_r[0]),
(excess_l[1],excess_r[1])]
if border_mode=='mirror':
data = np.pad(data, pad_with, mode='symmetric')
if border_mode=='0-pad':
data = np.pad(data, pad_with, mode='constant', constant_values=0)
return data
return [treatArray(d) for d in data_list]
[docs]class BatchCreatorImage(object):
def __init__(self, input_node, target_node=None, d_path=None, l_path=None,
d_files=None, l_files=None, cube_prios=None, valid_cubes=None,
border_mode='crop', aniso_factor=2, target_vec_ix=None,
target_discrete_ix=None, h5stream=False, zxy=True):
assert (d_path and l_path and d_files and l_files)
if len(d_files) != len(l_files):
raise ValueError(
"d_files and l_files must be lists of same length!")
d_path = os.path.expanduser(d_path)
l_path = os.path.expanduser(l_path)
self.zxy = zxy
self.h5stream = h5stream
self.d_path = d_path
self.l_path = l_path
self.d_files = d_files
self.l_files = l_files
self.cube_prios = cube_prios
self.valid_cubes = valid_cubes if valid_cubes is not None else []
self.aniso_factor = aniso_factor
self.border_mode = border_mode
self.target_vec_ix = target_vec_ix
self.target_discrete_ix = target_discrete_ix
# Infer geometric info from input/target shapes
self.ndim = input_node.shape.ndim
self.patch_size = np.array(input_node.shape.spatial_shape, dtype=np.int)
self.strides = np.array(target_node.shape.strides, dtype=np.int)
self.offsets = np.array(target_node.shape.offsets, dtype=np.int)
self.target_ps = self.patch_size - self.offsets * 2
self.t_dtype = target_node.output.dtype
if input_node.shape.ndim == target_node.shape.ndim:
self.mode = 'img-img'
else:
if target_node.shape.ndim > 1:
raise ValueError() ###TODO would it work to map 'img-vect'?
self.mode = 'img-scalar'
# The following will be inferred when reading data
self.n_labelled_pixel = 0
self.n_f = None # number of channels/feature in input
self.t_n_f = None # the shape of the returned label batch at index 1
# Actual data fields
self.valid_d = []
self.valid_l = []
self.valid_extra = []
self.train_d = []
self.train_l = []
self.train_extra = []
# Setup internal stuff
self.rng = np.random.RandomState(np.uint32((time.time() * 0.0001 -
int(
time.time() * 0.0001)) * 4294967295))
self.pid = os.getpid()
self.gc_count = 1
self._sampling_weight = None
self._training_count = None
self._valid_count = None
self.n_successful_warp = 0
self.n_failed_warp = 0
self.load_data()
logger.info('{}\n'.format(self.__repr__()))
def __repr__(self):
s = "{0:,d}-target Data Set with {1:,d} input channel(s):\n" + \
"#train cubes: {2:,d} and #valid cubes: {3:,d}, {4:,d} labelled " + \
"pixels."
s = s.format(self.t_n_f, self.n_f, self._training_count,
self._valid_count, self.n_labelled_pixel)
return s
@property
def warp_stats(self):
return "Warp stats: successful: %i, failed %i, quota: %.1f" % (
self.n_successful_warp, self.n_failed_warp,
float(self.n_successful_warp) / (
self.n_failed_warp + self.n_successful_warp))
def _reseed(self):
"""Reseeds the rng if the process ID has changed!"""
current_pid = os.getpid()
if current_pid != self.pid:
self.pid = current_pid
self.rng.seed(np.uint32((time.time() * 0.0001 -
int(
time.time() * 0.0001)) * 4294967295 + self.pid))
reload(
transformations) # needed for numba GIL released stuff to work
logger.debug(
"Reseeding RNG in Process with PID: {}".format(self.pid))
def _allocbatch(self, batch_size):
images = np.zeros((batch_size, self.n_f,) + tuple(self.patch_size),
dtype='float32')
sh = self.patch_size - self.offsets * 2
target = np.zeros((batch_size, self.t_n_f) + tuple(sh),
dtype=self.t_dtype)
return images, target
[docs] def getbatch(self, batch_size=1, source='train',
grey_augment_channels=None, warp=False, warp_args=None,
ignore_thresh=False, force_dense=False,
affinities=False, nhood_targets=False, ret_ll_mask=False,
nga_blur_noise_probability=False,
blurry_blobs_probability=False,
blurry_blobs_args=None,
noisy_random_erasing_probability=0,
noisy_random_erasing_args=None,
uniform_random_erasing_probability=0,
uniform_random_erasing_args=None):
"""
Prepares a batch by randomly sampling, shifting and augmenting
patches from the data
Parameters
----------
batch_size: int
Number of examples in batch (for CNNs often just 1)
source: str
Data set to draw data from: 'train'/'valid'
grey_augment_channels: list
List of channel indices to apply grey-value augmentation to
warp: bool or float
Whether warping/distortion augmentations are applied to examples
(slow --> use multiprocessing). If this is a float number,
warping is applied to this fraction of examples e.g. 0.5 --> every
other example.
warp_args: dict
Additional keyword arguments that get passed through to
elektronn2.data.transformations.get_warped_slice()
ignore_thresh: float
If the fraction of negative targets in an example patch exceeds this
threshold, this example is discarded (Negative targets are ignored
for training [but could be used for unsupervised target propagation]).
force_dense: bool
If True the targets are *not* sub-sampled according to the CNN output\
strides. Dense targets requires MFP in the CNN!
affinities
nhood_targets
ret_ll_mask: bool
If True additional information for reach batch example is returned.
Currently implemented are two ll_mask arrays to indicate the targeting mode.
The first dimension of those arrays is the batch_size!
nga_blur_noise_probability: bool or float
Probability of applying a Gaussian filter and noise to the input data.
The value must be a bool or be within the range [0.0, 1.0]
Default: False (disabled)
blurry_blobs_probability: bool or float
Probability of augmenting the input data with blurry "blobs".
"Blobs" mean random cubes across the input data. The region
within a cube is blurred with Gaussian smoothing. The level of smoothing
and the position of each cube are random.
The value must be a bool or be within the range [0.0, 1.0]
Default: False (disabled)
blurry_blobs_args: dict
Additional keyword arguments that get passed through to
elektronn2.data.non_geometric_augmentation.blurry_blobs().
Effective if setting blurry_blobs_probability > 0.
noisy_random_erasing_probability: float
Probability of augmenting the input data with noise blobs. The blob
region is filled with random noise values between 0 and 255.
noisy_random_erasing_args: dict
Additional keyword arguments that get passed through to
elektronn2.data.non_geometric_augmentation.noisy_random_erasing().
Effective if setting noisy_random_erasing_probability > 0.
uniform_random_erasing_probability: float
Probability of augmenting the input data with uniform blobs.
The blob region is filled with one value.
uniform_random_erasing_args: dict
Additional keyword arguments that get passed through to
elektronn2.data.non_geometric_augmentation.uniform_random_erasing().
Effective if uniform_random_erasing_probability > 0.
Returns
-------
data: np.ndarray
[bs, ch, x, y] or [bs, ch, z, y, x] for 2D and 3D CNNS
target: np.ndarray
[bs, ch, x, y] or [bs, ch, z, y, x]
ll_mask1: np.ndarray
(optional) [bs, n_target]
ll_mask2: np.ndarray
(optional) [bs, n_target]
"""
# This is especially required for multiprocessing
if grey_augment_channels is None:
grey_augment_channels = []
self._reseed()
images, target = self._allocbatch(batch_size)
ll_masks = []
patch_count = 0
while patch_count < batch_size: # Loop to fill up batch with examples
d, t, ll_mask = self._getcube(source) # get cube randomly
try:
if self.mode == 'img-img':
d, t = self.warp_cut(d, t, warp, warp_args)
else:
d, _ = self.warp_cut(d, None, warp, warp_args)
self.n_successful_warp += 1
except transformations.WarpingOOBError:
self.n_failed_warp += 1
continue
# Check only if a ignore_thresh is set and the cube is labelled
if (ignore_thresh is not False) and (not np.any(ll_mask[1])):
if (t < 0).mean() > ignore_thresh:
continue # do not use cubes which have no information
if source == "train": # no grey/non-geometric augmentation for testing
d = greyAugment(d, grey_augment_channels, self.rng)
if nga_blur_noise_probability:
random_value = np.random.rand()
if random_value < nga_blur_noise_probability:
nga.blur_augment(d, level=1, data_overwrite=False)
nga.noise_augment(d, level=0.15, data_overwrite=True)
if blurry_blobs_probability > 0\
and np.random.rand() < blurry_blobs_probability:
d = nga.blurry_blobs(data=d, **blurry_blobs_args)
if uniform_random_erasing_probability > 0\
and np.random.rand() < uniform_random_erasing_probability:
d = nga.uniform_random_erasing(data=d,
**uniform_random_erasing_args)
if noisy_random_erasing_probability > 0\
and np.random.rand() < noisy_random_erasing_probability:
d = nga.noisy_random_erasing(data=d,
**noisy_random_erasing_args)
target[patch_count] = t
images[patch_count] = d
ll_masks.append(ll_mask)
patch_count += 1
# Patch / Nhood target stuff
if nhood_targets is not None and nhood_targets is not False:
if isinstance(nhood_targets, np.ndarray):
nhood = nhood_targets
else:
nhood = np.array([[0, 0, 0],
[1, 0, 0],
[-1, 0, 0],
[0, 1, 0],
[0, -1, 0],
[0, 0, 1],
[0, 0, -1]], dtype=np.int)
if target.shape[1] == 1:
target = make_nhood_targets(target, nhood)
else: # Nhood targets for first feature only, keep other targets
tmp = make_nhood_targets(target[:, 0:1], nhood)
target = np.concatenate([tmp, target[:, 1:]], axis=1)
# Final modification of targets: striding and replacing nan
if not (force_dense or np.all(self.strides == 1)):
target = self._stridedtargets(target)
ret = [images, target] # The "normal" batch
# MALIS stuff
if affinities == 'malis':
# create aff from seg IDs (i.e. shape[1] must be 1)
aff, seg = make_affinities(target[:, 0]) # [bs, 3, z, y, x]
seg = seg[:, None] # add 'class' dimension
ret = [images, aff, seg]
elif affinities == 'affinity':
aff, seg = make_affinities(target) # [bs, z, y, x, 3]
ret = [images, aff]
# Lazy Labels stuff
if ret_ll_mask: # ll_mask is now a list(bs) of tuples(2)
ll_masks = np.atleast_3d(
np.array(ll_masks, dtype=np.int16)) # (bs, 2, 5)
ll_mask1 = ll_masks[:, 0]
ll_mask2 = ll_masks[:, 1]
ret += [ll_mask1, ll_mask2]
target[
np.isnan(target)] = -666 # hack because theano does not support nan
self.gc_count += 1
if self.gc_count % 1000 == 0:
gc.collect()
return tuple(ret)
[docs] def warp_cut(self, img, target, warp, warp_params):
"""
(Wraps :py:meth:`elektronn2.data.transformations.get_warped_slice()`)
Cuts a warped slice out of the input and target arrays.
The same random warping transformation is each applied to both input
and target.
Warping is randomly applied with the probability defined by the ``warp``
parameter (see below).
Parameters
----------
img: np.ndarray
Input image
target: np.ndarray
Target image
warp: float or bool
False/True disable/enable warping completely.
If ``warp`` is a float, it is used as the ratio of inputs that
should be warped.
E.g. 0.5 means approx. every second call to this function actually
applies warping to the image-target pair.
warp_params: dict
kwargs that are passed through to
:py:meth:`elektronn2.data.transformations.get_warped_slice()`.
Can be empty.
Returns
-------
d: np.ndarray
(Warped) input image slice
t: np.ndarray
(Warped) target slice
"""
if (warp is True) or (warp == 1): # always warp
do_warp = True
elif (0 < warp < 1): # warp only a fraction of examples
do_warp = True if (self.rng.rand() < warp) else False
else: # never warp
do_warp = False
if not do_warp:
warp_params = dict(warp_params)
warp_params['warp_amount'] = 0
d, t = transformations.get_warped_slice(img, self.patch_size,
aniso_factor=self.aniso_factor,
target=target,
target_ps=self.target_ps,
target_vec_ix=self.target_vec_ix,
target_discrete_ix=self.target_discrete_ix,
rng=self.rng, **warp_params)
return d, t
def _getcube(self, source):
"""
Draw an example cube according to sampling weight on training data,
or randomly on valid data
"""
if source == 'train':
p = self.rng.rand()
i = np.flatnonzero(self._sampling_weight <= p)[-1]
d, t, ll_mask = self.train_d[i], self.train_l[i], self.train_extra[
i]
elif source == "valid":
if len(self.valid_d) == 0:
logger.info(
"Validation Set empty. Disable testing on validation set.")
raise ValueError("No validation set")
i = self.rng.randint(0, len(self.valid_d))
d = self.valid_d[i]
t = self.valid_l[i]
ll_mask = self.valid_extra[i]
else:
raise ValueError("Unknown data source")
return d, t, ll_mask
def _stridedtargets(self, lab):
if self.ndim == 3:
return lab[:, :, ::self.strides[0], ::self.strides[1],
::self.strides[2]]
elif self.ndim == 2:
return lab[:, :, ::self.strides[0], ::self.strides[1]]
[docs] def load_data(self):
"""
Parameters
----------
d_path/l_path: string
Directories to load data from
d_files/l_files: list
List of data/target files in <path> directory (must be in the same order!).
Each list element is a tuple in the form
**(<Name of h5-file>, <Key of h5-dataset>)**
cube_prios: list
(not normalised) list of sampling weights to draw examples from
the respective cubes. If None the cube sizes are taken as priorities.
valid_cubes: list
List of indices for cubes (from the file-lists) to use as validation
data and exclude from training, may be empty list to skip performance
estimation on validation data.
"""
# returns lists of cubes, ll_mask is a tuple per cube
data, target, extras = self.read_files()
if self.mode == 'img-scalar':
data = border_treatment(data, self.patch_size, self.border_mode,
self.ndim)
default_extra = (np.ones(self.t_n_f), np.zeros(self.t_n_f))
extras = [default_extra if x is None else x for x in extras]
prios = []
# Distribute Cubes into training and valid list
for k, (d, t, e) in enumerate(zip(data, target, extras)):
if k in self.valid_cubes:
self.valid_d.append(d)
self.valid_l.append(t)
self.valid_extra.append(e)
else:
self.train_d.append(d)
self.train_l.append(t)
self.train_extra.append(e)
# If no priorities are given: sample proportional to cube size
prios.append(t.size)
if self.cube_prios is None:
prios = np.array(prios, dtype=np.float)
else: # If priorities are given: sample irrespective of cube size
prios = np.array(self.cube_prios, dtype=np.float)
# sample example i if: batch_prob[i] < p
self._sampling_weight = np.hstack((0, np.cumsum(prios / prios.sum())))
self._training_count = len(self.train_d)
self._valid_count = len(self.valid_d)
[docs] def check_files(self):
"""
Check if file paths in the network config are available.
"""
notfound = False
give_neuro_data_hint = False
fullpaths = [os.path.join(self.d_path, f) for f, _ in self.d_files] + \
[os.path.join(self.l_path, f) for f, _ in self.l_files]
for p in fullpaths:
if not os.path.exists(p):
print('{} not found.'.format(p))
notfound = True
if 'neuro_data_zxy' in p:
give_neuro_data_hint = True
if give_neuro_data_hint:
print(
'\nIt looks like you are referencing the neuro_data_zxy dataset.\n'
'To install the neuro_data_xzy dataset to the default location, run:\n'
' $ wget http://elektronn.org/downloads/neuro_data_zxy.zip\n'
' $ unzip neuro_data_zxy.zip -d ~/neuro_data_zxy')
if notfound:
print('\nPlease fetch the necessary dataset and/or '
'change the relevant file paths in the network config.')
sys.stdout.flush()
sys.exit(1)
[docs] def read_files(self):
"""
Image files on disk are expected to be in order (ch,x,y,z) or (x,y,z)
But image stacks are returned as (z,ch,x,y) and target as (z,x,y,)
irrespective of the order in the file. If the image files have no
channel this dimension is extended to a singleton dimension.
"""
self.check_files()
data, target, extras = [], [], []
pbar = tqdm.tqdm(total=len(self.d_files), ncols=120, leave=False)
for (d_f, d_key), (l_f, l_key) in zip(self.d_files, self.l_files):
pbar.write('Loading %s and %s' % (d_f, l_f))
if self.h5stream:
d = h5py.File(os.path.join(self.d_path, d_f), 'r')[d_key]
t = h5py.File(os.path.join(self.l_path, l_f), 'r')[l_key]
assert d.compression == t.compression == None
assert len(d.shape) == len(t.shape) == 4
assert d.dtype == np.float32
assert t.dtype == self.t_dtype
else:
d = utils.h5load(os.path.join(self.d_path, d_f), d_key)
t = utils.h5load(os.path.join(self.l_path, l_f), l_key)
try:
ll_mask_1 = utils.h5load(os.path.join(self.l_path, l_f),
'll_mask')
if not self.zxy:
ll_mask_1 = transformations.xyz2zxy(ll_mask_1)
extras.append(ll_mask_1)
except KeyError:
extras.append(None)
if not self.zxy:
d = transformations.xyz2zxy(d)
t = transformations.xyz2zxy(t)
if self.mode == 'img-scalar':
assert t.ndim == 1, "Scalar targets must be 1d"
if len(d.shape) == 4: # h5 dataset has no ndim
self.n_f = d.shape[0]
elif len(d.shape) == 3: # We have no channels in data
self.n_f = 1
d = d[None] # add (empty) 0-axis
if len(t.shape) == 3: # If labels not empty add first axis
t = t[None]
if self.t_n_f is None:
self.t_n_f = t.shape[0]
else:
assert self.t_n_f == t.shape[0]
self.n_labelled_pixel += t[0].size
# determine normalisation depending on int or float type
if d.dtype.kind in ('u', 'i'):
m = 255.
d = np.ascontiguousarray(d, dtype=np.float32) / m
if (np.dtype(self.t_dtype) is not np.dtype(t.dtype)) and \
self.t_dtype not in ['float32']:
m = t.max()
M = np.iinfo(self.t_dtype).max
if m > M:
raise ValueError("Loading of data: targets must be cast "
"to %s, but %s cannot store value %g, "
"maximum allowed value: %g. You may try "
"to renumber targets." % (self.t_dtype,
self.t_dtype, m,
M))
if not self.h5stream:
d = np.ascontiguousarray(d, dtype=np.float32)
t = np.ascontiguousarray(t, dtype=self.t_dtype)
pbar.write('Shapes: data %s, targets %s' % (d.shape, t.shape))
data.append(d)
target.append(t)
gc.collect()
pbar.update()
return data, target, extras
"""
3d Label kinds:
k: number of classes
n_c: number of categories (each of which has n_k classes)
normal single category multiclass classification (e.g. bg,syn,mito,ves):
(x,y,z) encoded_targets==True, n_targets==k
(1,x,y,z) encoded_targets==True, n_targets==k
(k,x,y,z) encoded_targets==False, n_targets==k
multicategory multiclass classification (e.g. affinities):
(n_c, x,y,z) encoded_targets==True, n_targets=k
(n_c*k,x,y,z) encoded_targets==False, n_targets=k,
single target regression (e.g. image reconstruction of gray img):
(x,y,z) encoded_targets indiff, n_targets=1
(1,x,y,z) encoded_targets indiff, n_targets=1
multi target regression (e.g. image reconstruction of rgb img):
(k,x,y,z) n_targets=k
--> l.shape[1] = ?
"""
##############################################################################################################
class DummySkel(object):
def __init__(self):
self.grad = np.array([0,0,0], dtype=np.float32)
self.last_slicing_params = None
self.lost_track = True # don't train sequentially on this
self.debug_traces = []
self.debug_traces_current = []
self.debug_grads = []
self.debug_grads_current = []
def _grad(self, pred_zxy):
# L= 1/2(x**2 + y**2)
# dL/dp = [0, x, y]
return np.array([0,pred_zxy[1],pred_zxy[2]], dtype=np.float32)
def get_loss_and_gradient(self, prediction_c, **kwargs):
loss = abs(prediction_c[1:]).sum() # sum of x,y magnitude
loss = np.array([loss, ], dtype=np.float32)
self.debug_traces_current.append(None)
self.debug_grads_current.append(None)
return loss, self._grad(prediction_c)
[docs]class AgentData(BatchCreatorImage):
"""
Load raw_cube, vec_prob_obj_cube and skelfiles + rel.offset
"""
def __init__(self, input_node, side_target_node, path_prefix=None,
raw_files=None, skel_files=None, vec_files=None, valid_skels=None,
target_vec_ix=None, target_discrete_ix=None,
abs_offset=None, aniso_factor=2):
assert (path_prefix and raw_files and skel_files and vec_files and abs_offset)
abs_offset = np.array(abs_offset, dtype=np.int)
path_prefix = os.path.expanduser(path_prefix)
self.path_prefix = path_prefix
self.raw_files = raw_files
self.skel_files = skel_files
self.vec_files = vec_files
self.valid_skels = valid_skels if valid_skels is not None else []
self.abs_offset = abs_offset
self.aniso_factor = aniso_factor
self.target_vec_ix = target_vec_ix
self.target_discrete_ix = target_discrete_ix
# Infer geometric info from input/target shapes
self.ndim = input_node.shape.ndim
self.patch_size = np.array(input_node.shape.spatial_shape, dtype=np.int)
self.strides = np.array(side_target_node.shape.strides, dtype=np.int)
self.offsets = np.array(side_target_node.shape.offsets, dtype=np.int)
self.target_ps = self.patch_size - self.offsets * 2
#self.n_ch_model = input_node.shape['f']
self.n_f = None # number of channels/feature in input
# will be inferred when reading data
self.t_n_f = None # the shape of the returned label batch at index 1
self.t_dtype = side_target_node.output.dtype
# Need to infer this when reading labels and with n_target/encoded_targets
# Setup internal stuff
self.rng = np.random.RandomState(np.uint32((time.time()*0.0001 -
int(time.time()*0.0001))*4294967295))
self.pid = os.getpid()
self.gc_count = 1
self.n_failed_warp = 0
self.n_successful_warp = 0
self._sampling_weight = None
self._training_count = None
self._valid_count = None
# Actual data fields
self.train_d = []
self.train_l = []
self.valid_s = []
self.train_s = []
assert len(raw_files)==len(skel_files)==len(vec_files)==1, "not supported atm, skeletons " \
"from different cubes are concated, " \
"only one abs offset"
self.load_data()
logger.info('{}\n'.format(self.__repr__()))
def __repr__(self):
s = "Data Set with {0:,d} input channel(s):\n"+\
"#train skels: {1:,d} and #valid skels: {2:,d}"
s = s.format(self.n_f, len(self.train_s),
len(self.valid_s))
return s
def _allocbatch(self, batch_size):
images, target = super(AgentData, self)._allocbatch(batch_size)
skel_i = np.zeros(batch_size, dtype='int16')
slicing_params = np.zeros([batch_size,20], dtype='float32')
return images, target, skel_i, slicing_params
[docs] def getbatch(self, batch_size=1, source='train', aniso=True,
z_shift=0, gamma=0,
grey_augment_channels=None,
r_max_scale=0.9,
tracing_dir_prior_c=0.5,
force_dense=False, flatfield_p=1e-3):
# This is especially required for multiprocessing
if grey_augment_channels is None:
grey_augment_channels = []
self._reseed()
images, target, skel_i, slicing_params = self._allocbatch(batch_size)
patch_count = 0
while patch_count < batch_size: # Loop to fill up batch with examples
if (self.rng.rand() > flatfield_p) or source!='train':
skel, i = self.getskel(source) # get skel randomly
position_s = skel.sample_tube_point(self.rng, r_max_scale=r_max_scale)
local_direc_is = skel.sample_local_direction_iso(position_s, n_neighbors=6)
tracing_direc_is = skel.sample_tracing_direction_iso(self.rng, local_direc_is, c=tracing_dir_prior_c)
position_l = position_s[::-1] # from lab to data frame (xyz) -> (zxy)
position_d = position_l - self.abs_offset
tracing_direc_il = tracing_direc_is[::-1] # from lab to data frame (xyz) -> (zxy)
try:
if config.inspection > 2: # mark position by opaque cube
self.train_d[0][:,
position_d[0] - 1:position_d[0] + 2,
position_d[1] - 2:position_d[1] + 3,
position_d[2] - 2:position_d[2] + 3] = 1.0
image, vec, M = transformations.get_tracing_slice(
self.train_d[0], self.patch_size, position_d, z_shift=z_shift,
aniso_factor=self.aniso_factor, sample_aniso=aniso,
gamma=gamma, direction_iso=tracing_direc_il,
target=self.train_l[0], target_ps=self.target_ps,
target_vec_ix=self.target_vec_ix,
target_discrete_ix=self.target_discrete_ix, rng=self.rng)
self.n_successful_warp += 1
trafo = transformations.Transform(M, position_l=position_l,
aniso_factor=self.aniso_factor)
except transformations.WarpingOOBError:
self.n_failed_warp += 1
continue
if source == "train": # no grey augmentation for testing
image = greyAugment(image, grey_augment_channels, self.rng)
else:
vec_sh = (self.train_l[0].shape[0],)+tuple(self.target_ps)
vec = np.ones(vec_sh, dtype=self.t_dtype) * np.nan
img_sh = (self.train_d[0].shape[0],)+tuple(self.patch_size)
image = np.zeros(img_sh, dtype=np.float32)
i = len(self.train_s) - 1
M = np.eye(4)
position_l = np.zeros(3, np.int)
position_d = position_l
local_direc_is = position_l
tracing_direc_il = position_l
trafo = transformations.Transform(M, position_l=position_l,
aniso_factor=self.aniso_factor)
target[patch_count] = vec
images[patch_count] = image
skel_i[patch_count] = i
slicing_params[patch_count] = trafo.to_array()
patch_count += 1
target[np.isnan(target)] = -666 # Stupid hack because theano does not support nan
if not (force_dense or np.all(self.strides==1)):
target = self._stridedtargets(target)
ret = [images, target, skel_i, slicing_params]
if config.inspection > 2:
debug_img = self.train_d[0][:,
position_d[0] - 11:position_d[0] + 12,
position_d[1] - 21:position_d[1] + 22,
position_d[2] - 21:position_d[2] + 22]
if i == len(self.train_s) - 1:
i = "DummySkel"
local_direc_il = local_direc_is[::-1]
inspection_logger.info("skeleton: %s" % i)
inspection_logger.info("pos_d: %s, pos_l: %s, ldir_il: %s, tdir_il: %s"
% (position_d.tolist(),
position_l.tolist(),
local_direc_il.tolist(),
tracing_direc_il.tolist()))
inspection_logger.info("params: %s" % (trafo.to_array().tolist()))
ret = [images, target, skel_i, slicing_params, debug_img]
self.gc_count += 1
if self.gc_count%1000==0:
gc.collect()
return tuple(ret)
[docs] def get_newslice(self, position_l, direction_il, batch_size=1, source='train',
aniso=True, z_shift=0, gamma=0, grey_augment_channels=None,
r_max_scale=0.9, tracing_dir_prior_c=0.5, force_dense=False,
flatfield_p=1e-3, scale=1.0, last_ch_max_interp=False):
assert batch_size==1
if grey_augment_channels is None:
grey_augment_channels = []
if source!='train':
raise ValueError
images, target, skel_i, slicing_params = self._allocbatch(batch_size)
position_d = position_l - self.abs_offset
if config.inspection > 2: # mark position by opaque cube
self.train_d[0][:,
position_d[0] - 2:position_d[0] + 3,
position_d[1] - 4:position_d[1] + 5,
position_d[2] - 4:position_d[2] + 5] = 1.0
image, vec, M = transformations.get_tracing_slice(
self.train_d[0], self.patch_size, position_d,
z_shift=z_shift, aniso_factor=self.aniso_factor, sample_aniso=aniso,
gamma=gamma, scale_factor=scale, direction_iso=direction_il,
target=self.train_l[0], target_ps=self.target_ps,
target_vec_ix=self.target_vec_ix,
target_discrete_ix=self.target_discrete_ix, rng=self.rng,
last_ch_max_interp=last_ch_max_interp)
trafo = transformations.Transform(M, position_l=position_l,
aniso_factor=self.aniso_factor)
image = greyAugment(image, grey_augment_channels, self.rng)
if vec is not None: # allow for using this with no vec data
target[0] = vec
images[0] = image
target[np.isnan(target)] = -666 # Stupid hack because theano does not support nan
if not (force_dense or np.all(self.strides==1)):
target = self._stridedtargets(target)
ret = [images, target, trafo]
if config.inspection > 2:
debug_img = self.train_d[0][:,
position_d[0] - 11:position_d[0] + 12,
position_d[1] - 21:position_d[1] + 22,
position_d[2] - 21:position_d[2] + 22]
# inspection_logger.info("skeleton: 999")
# inspection_logger.info("pos_d: %s, pos_l: %s, ldir_il: %s, tdir_il: %s"
# % (position_d.tolist(),
# position_l.tolist(),
# direction_il.tolist(),
# direction_il.tolist()))
# inspection_logger.info("params: %s" % (trafo.to_array().tolist()))
image_ref, _,_= transformations.get_tracing_slice(
self.train_d[0], self.patch_size, position_d,
z_shift=z_shift, aniso_factor=self.aniso_factor,
sample_aniso=aniso,
gamma=gamma, scale_factor=1.0, direction_iso=direction_il,
target=self.train_l[0], target_ps=self.target_ps,
target_vec_ix=self.target_vec_ix,
target_discrete_ix=self.target_discrete_ix, rng=self.rng)
i = np.random.randint(0, 1014)
utils.h5save(image, '/tmp/%s_img%i-%.2f.h5'%(user_name, i,scale))
utils.h5save(image_ref, '/tmp/%s_img%i-ref.h5' % (user_name, i))
#utils.h5save(debug_img, '/tmp/dbg-img-%.2f.h5' % scale)
ret = [images, target, trafo, debug_img]
return tuple(ret)
[docs] def getskel(self, source):
"""
Draw an example skeleton according to sampling weight on training data,
or randomly on valid data
"""
if source=='train':
p = self.rng.rand()
i = np.flatnonzero(self._sampling_weight <= p)[-1]
skel = self.train_s[i]
elif source == "valid":
if len(self.valid_s) == 0:
logger.info("Validation Set empty. Disable testing on validation set.")
return []
i = self.rng.randint(0, len(self.valid_s))
skel = self.valid_s[i]
else:
raise ValueError("Unknown data source")
return skel, i
[docs] def load_data(self):
"""
Parameters
----------
d_path/l_path: string
Directories to load data from
d_files/l_files: list
List of data/target files in <path> directory (must be in the same order!).
Each list element is a tuple in the form
**(<Name of h5-file>, <Key of h5-dataset>)**
cube_prios: list
(not normalised) list of sampling weights to draw examples from
the respective cubes. If None the cube sizes are taken as priorities.
valid_cubes: list
List of indices for cubes (from the file-lists) to use as validation
data and exclude from training, may be empty list to skip performance
estimation on validation data.
"""
# returns lists of cubes, ll_mask is a tuple per cube
data, vec, skeletons = self.read_files()
self.train_d = data
self.train_l = vec
prios = []
# Distribute Cubes into training and valid list
for skel_list, valid_skels in zip(skeletons, self.valid_skels):
for skel_i, skel in enumerate(skel_list):
if skel_i in valid_skels:
self.valid_s.append(skel)
else:
self.train_s.append(skel)
# If no priorities are given: sample proportional to cube size
prios.append(len(skel.all_nodes))
prios = np.array(prios, dtype=np.float)
# sample example i if: batch_prob[i] < p
self._sampling_weight = np.hstack((0, np.cumsum(prios / prios.sum())))
self.train_s.append(DummySkel())
[docs] def read_files(self):
"""
Image files on disk are expected to be in order (ch,x,y,z) or (x,y,z)
But image stacks are returned as (z,ch,x,y) and target as (z,x,y,)
irrespective of the order in the file. If the image files have no
channel this dimension is extended to a singleton dimension.
"""
logger.info("Loading Data")
data, vec, skeletons = [], [], []
for (d_f, d_key), (l_f, l_key), s_f in zip(self.raw_files, self.vec_files, self.skel_files):
d = utils.h5load(os.path.join(self.path_prefix, d_f), d_key)
if d.ndim == 4:
self.n_f = d.shape[0]
elif len(d.shape) == 3: # We have no channels in data
self.n_f = 1
d = d[None] # add (empty) 0-axis
# determine normalisation depending on int or float type
if d.dtype in [np.int, np.int8, np.int16, np.int32, np.uint32,
np.uint, np.uint8, np.uint16, np.uint32, np.uint32]:
m = 255
else:
m = 1
d = np.ascontiguousarray(d, dtype=np.float32) / m
try:
t = utils.h5load(os.path.join(self.path_prefix, l_f), l_key)
if len(t.shape) == 3: # If labels not empty add first axis
t = t[None]
assert t.ndim==4
self.t_n_f = t.shape[0]
if (self.t_dtype is not t.dtype) and self.t_dtype not in ['float32']:
m = t.max()
M = np.iinfo(self.t_dtype).max
if m > M:
raise ValueError("Loading of data: targets must be cast "
"to %s, but %s cannot store value %g, "
"maximum allowed value: %g. You may try "
"to renumber targets." %(self.t_dtype,
self.t_dtype, m, M))
t = np.ascontiguousarray(t, dtype=self.t_dtype)
except:
t = None
self.t_n_f = 9
gc.collect()
data.append(d)
vec.append(t)
s = utils.pickleload(os.path.join(self.path_prefix, s_f))
skeletons.append(s)
for skel in s: # backward fixes of attributes that are not pickled
skel.lost_track = False
skel.position_s = None
skel.position_l = None
skel.direction_il = None
skel.start_new_training = True
skel.prev_batch = None
skel.trafo = None
skel.training_traces = []
skel.current_trace = None
skel.linked_data = self
skel._hull_point_bg = dict()
skel.background_processes = config.background_processes
skel.cnn_grid = None
return data, vec, skeletons
[docs]class GridData(AgentData):
def __init__(self, *args, **kwargs):
super(GridData, self).__init__(*args, **kwargs)
#self.mean_grid = utils.pickleload("~/CNN_Training/mean.pkl")
[docs] def getbatch(self, **get_batch_kwargs):
self._reseed()
try:
skel_example, skel_index = self.getskel('train')
skel_example.start_new_training = True
batch = skel_example.getbatch(0, 0.0, **get_batch_kwargs)
img, target_img, target_grid, target_node= batch
target_grid = target_grid[None]
#target_grid -= self.mean_grid[None,None]
self.last_skel = skel_example #DBG
return img, target_grid
except transformations.WarpingOOBError:
return self.getbatch(**get_batch_kwargs)