Source code for deepdish.core

from __future__ import division, print_function, absolute_import
_is_verbose = False
_is_silent = False

import time
import warnings
import numpy as np
import itertools as itr
import sys
from contextlib import contextmanager
warnings.simplefilter("ignore", np.ComplexWarning)


class AbortException(Exception):
    """
    This exception is used for when the user wants to quit algorithms mid-way.
    The `AbortException` can for instance be sent by pygame input, and caught
    by whatever is running the algorithm.
    """
    pass


[docs]def bytesize(arr): """ Returns the memory byte size of a Numpy array as an integer. """ byte_size = np.prod(arr.shape) * np.dtype(arr.dtype).itemsize return byte_size
def humanize_bytesize(byte_size): order = np.log(byte_size) / np.log(1024) orders = [ (5, 'PB'), (4, 'TB'), (3, 'GB'), (2, 'MB'), (1, 'KB'), (0, 'B') ] for ex, name in orders: if order >= ex: return '{:.4g} {}'.format(byte_size / 1024**ex, name)
[docs]def memsize(arr): """ Returns the required memory of a Numpy array as a humanly readable string. """ return humanize_bytesize(bytesize(arr))
[docs]def span(arr): """ Calculate and return the mininum and maximum of an array. Parameters ---------- arr : ndarray Numpy array. Returns ------- min : dtype Minimum of array. max : dtype Maximum of array. """ # TODO: This could be made faster with a custom ufunc return (np.min(arr), np.max(arr))
[docs]def apply_once(func, arr, axes, keepdims=True): """ Similar to `numpy.apply_over_axes`, except this performs the operation over a flattened version of all the axes, meaning that the function will only be called once. This only makes a difference for non-linear functions. Parameters ---------- func : callback Function that operates well on Numpy arrays and returns a single value of compatible dtype. arr : ndarray Array to do operation over. axes : int or iterable Specifies the axes to perform the operation. Only one call will be made to `func`, with all values flattened. keepdims : bool By default, this is True, so the collapsed dimensions remain with length 1. This is simlar to `numpy.apply_over_axes` in that regard. If this is set to False, the dimensions are removed, just like when using for instance `numpy.sum` over a single axis. Note that this is safer than subsequently calling squeeze, since this option will preserve length-1 dimensions that were not operated on. Examples -------- >>> import deepdish as dd >>> import numpy as np >>> rs = np.random.RandomState(0) >>> x = rs.uniform(size=(10, 3, 3)) Image that you have ten 3x3 images and you want to calculate each image's intensity standard deviation: >>> np.apply_over_axes(np.std, x, [1, 2]).ravel() array([ 0.06056838, 0.08230712, 0.08135083, 0.09938963, 0.08533604, 0.07830725, 0.066148 , 0.07983019, 0.08134123, 0.01839635]) This is the same as ``x.std(1).std(1)``, which is not the standard deviation of all 9 pixels together. To fix this we can flatten the pixels and try again: >>> x.reshape(10, 9).std(axis=1) array([ 0.17648981, 0.32849108, 0.29409526, 0.25547501, 0.23649064, 0.26928468, 0.20081239, 0.33052397, 0.29950855, 0.26535717]) This is exactly what this function does for you: >>> dd.apply_once(np.std, x, [1, 2], keepdims=False) array([ 0.17648981, 0.32849108, 0.29409526, 0.25547501, 0.23649064, 0.26928468, 0.20081239, 0.33052397, 0.29950855, 0.26535717]) """ all_axes = np.arange(arr.ndim) if isinstance(axes, int): axes = {axes} else: axes = set(axis % arr.ndim for axis in axes) principal_axis = min(axes) for i, axis in enumerate(axes): axis0 = principal_axis + i if axis != axis0: all_axes[axis0], all_axes[axis] = all_axes[axis], all_axes[axis0] transposed_arr = arr.transpose(all_axes) new_shape = [] new_shape_keepdims = [] for axis, dim in enumerate(arr.shape): if axis == principal_axis: new_shape.append(-1) elif axis not in axes: new_shape.append(dim) if axis in axes: new_shape_keepdims.append(1) else: new_shape_keepdims.append(dim) collapsed = np.apply_along_axis(func, principal_axis, transposed_arr.reshape(new_shape)) if keepdims: return collapsed.reshape(new_shape_keepdims) else: return collapsed
[docs]def tupled_argmax(a): """ Argmax that returns an index tuple. Note that `numpy.argmax` will return a scalar index as if you had flattened the array. Parameters ---------- a : array_like Input array. Returns ------- index : tuple Tuple of index, even if `a` is one-dimensional. Note that this can immediately be used to index `a` as in ``a[index]``. Examples -------- >>> import numpy as np >>> import deepdish as dd >>> a = np.arange(6).reshape(2,3) >>> a array([[0, 1, 2], [3, 4, 5]]) >>> dd.tupled_argmax(a) (1, 2) """ return np.unravel_index(np.argmax(a), np.shape(a))
def multi_range(*args): return itr.product(*[range(a) for a in args]) @contextmanager
[docs]def timed(name=None, file=sys.stdout, callback=None, wall_clock=True): """ Context manager to make it easy to time the execution of a piece of code. This timer will never run your code several times and is meant more for simple in-production timing, instead of benchmarking. Reports the wall-clock time (using `time.time`) and not the processor time. Parameters ---------- name : str Name of the timing block, to identify it. file : file handler Which file handler to print the results to. Default is standard output. If a numpy array and size 1 is given, the time in seconds will be stored inside it. Ignored if `callback` is set. callback : callable This offer even more flexibility than `file`. The callable will be called at the end of the execution with a single floating point argument with the elapsed time in seconds. Examples -------- >>> import deepdish as dd >>> import time The `timed` function is a context manager, so everything inside the ``with`` block will be timed. The results will be printed by default to standard output: >>> with dd.timed('Sleep'): # doctest: +SKIP ... time.sleep(1) [timed] Sleep: 1.001035451889038 s Using the `callback` parameter, we can accumulate multiple runs into a list: >>> times = [] >>> for i in range(3): # doctest: +SKIP ... with dd.timed(callback=times.append): ... time.sleep(1) >>> times # doctest: +SKIP [1.0035350322723389, 1.0035550594329834, 1.0039470195770264] """ start = time.time() yield end = time.time() delta = end - start if callback is not None: callback(delta) elif isinstance(file, np.ndarray) and len(file) == 1: file[0] = delta else: name_str = ' {}'.format(name) if name is not None else '' print(("[timed]{0}: {1} s".format(name_str, delta)), file=file)
class SliceClass(object): def __getitem__(self, index): return index aslice = SliceClass()