Source code for crumpets.augmentation

import random
from math import ceil
import warnings

import numpy as np
import cv2
from simplejpeg import is_jpeg
from simplejpeg import decode_jpeg

from .rng import INTERP_LINEAR
from .rng import INTERP_AERA
from .rng import MAX_SUPERSAMPLING
from . import augmentation_cpu as cpuaugs


try:
    # noinspection PyUnresolvedReferences
    IMREAD_COLOR = cv2.IMREAD_COLOR
    # noinspection PyUnresolvedReferences
    IMREAD_GRAYSCALE = cv2.IMREAD_GRAYSCALE
except AttributeError:
    # noinspection PyUnresolvedReferences
    IMREAD_COLOR = cv2.CV_LOAD_IMAGE_COLOR
    # noinspection PyUnresolvedReferences
    IMREAD_GRAYSCALE = cv2.CV_LOAD_IMAGE_GRAYSCALE


[docs]def decode_opencv(data, color):
    a = np.frombuffer(data, dtype=np.uint8)
    im = cv2.imdecode(a, IMREAD_COLOR if color else -1)
    if im is None:
        raise ValueError('OpenCV could not decode image')
    if color:
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    if len(im.shape) < 3:
        im = im[:, :, np.newaxis]
    return im


# don't use OpenCL
# prevents spawning GPU processes that hog memory
try:
    cv2.ocl.setUseOpenCL(False)
except AttributeError:
    pass
# run single-threaded; only warpAffine benefits and scales poorly
cv2.setNumThreads(1)


[docs]def decode_image(data, color, min_height=0, min_width=0, min_factor=2):
    if data is None:
        return None
    try:
        # fast path for JPEG
        if is_jpeg(data):
            return decode_jpeg(data, 'rgb' if color else 'gray',
                               True, True, min_height, min_width, min_factor)
        # scrub-tier images
        else:
            return decode_opencv(data, color)
    except (ValueError, RuntimeError) as e:
        warnings.warn(RuntimeWarning('simplejpeg failed to decode image, '
                                     + 'falling back to OpenCV: '
                                     + repr(e)))
        return decode_opencv(data, color)


[docs]def calc_scale_ratio(source_size, target_size, scale, scale_mode):
    sh, sw = source_size
    sh /= 2
    sw /= 2
    th, tw = target_size
    th /= 2
    tw /= 2
    if scale_mode == 'longest':
        r = max(sh / th, sw / tw) / scale
    elif scale_mode == 'shortest':
        r = min(sh / th, sw / tw) / scale
    else:
        raise ValueError('unknown scale mode %r' % scale_mode)
    return r, sh, sw, th, tw


[docs]def make_transform(
        source_size,
        target_size,
        angle=0,
        scale=1,
        aspect=1,
        shift=None,
        hmirror=False,
        vmirror=False,
        shear=None,
        scale_mode='shortest',
        __identity__=np.eye(3)
):
    r, sh, sw, th, tw = calc_scale_ratio(source_size, target_size,
                                         scale, scale_mode)
    # first shift target to origin
    p = __identity__.copy()
    p[(0, 1), 2] = -tw, -th
    # shear
    if shear:
        q = __identity__.copy()
        q[(1, 0), (0, 1)] = shear
        p = np.dot(q, p)
    # resize and mirror
    q = __identity__.copy()
    q[0, 0] = r*aspect * (-1 if hmirror else 1)
    q[1, 1] = r/aspect * (-1 if vmirror else 1)
    p = np.dot(q, p)
    # then rotate
    if angle:
        q = __identity__.copy()
        q[:2] = cv2.getRotationMatrix2D((0, 0), -angle, 1)
        p = np.dot(q, p)
    # finally shift to desired position in source image
    wdelta = max(sw - tw*r, tw*r - sw)
    hdelta = max(sh - th*r, th*r - sh)
    shift = shift or (0, 0)
    q = __identity__.copy()
    q[(0, 1), 2] = sw + wdelta * shift[1], sh + hdelta * shift[0]
    p = np.dot(q, p)
    return p[:2]


[docs]def rotate_and_resize(
        im,
        angle,
        target_size,
        scale,
        aspect,
        shift,
        method,
        background,
        hmirror,
        vmirror,
        shear=None,
        scale_mode='shortest',
        supersampling=0
):
    if background is None:
        background = 0
    if shear is None:
        shear = (0, 0)
    if supersampling == 0:
        r = calc_scale_ratio(im.shape[:2], target_size, scale, scale_mode)[0]
        # noinspection PyTypeChecker
        supersampling = max(1, min(MAX_SUPERSAMPLING, ceil(r)))
    if supersampling > 1:
        th, tw = target_size
        sampling_size = int(round(th * supersampling)), int(round(th * supersampling))
    else:
        sampling_size = target_size
    p = make_transform(
        im.shape[:2], sampling_size,
        angle, scale, aspect, shift, hmirror, vmirror,
        shear, scale_mode,
    )
    im = cv2.warpAffine(
        im, p, sampling_size[::-1],
        borderValue=background,
        flags=method + cv2.WARP_INVERSE_MAP,
    )
    if supersampling > 1:
        im = cv2.resize(im, target_size[::-1], interpolation=INTERP_AERA)
    # OpenCV swallows third dimension if single channel
    if len(im.shape) != 3:
        im = im[:, :, np.newaxis]
    return im


[docs]def randomize_image(
        im,
        size,
        background=None,
        color=True,
        angle=0,
        scale=1,
        shift=None,
        aspect=1,
        hmirror=False,
        vmirror=False,
        interp_method=INTERP_LINEAR,
        gamma_gray=None,
        gamma_color=None,
        contrast=None,
        noise=None,
        blur=None,
        shear=None,
        is_rgb=True,
        scale_mode='shortest',
        supersampling=0,
        gpu_augmentation=False,
        do_rotate_and_resize=True
):
    """
    Randomizes image according to given parameters.

    :param im:
        image to be transformed.
    :param size:
        target size of resulting image.
    :param background:
        background color that fills areas in the output
        where there is no pixel data;
        can be number or tuple with same number of elements as channel
    :param color:
        Boolean that flags if image is black-white or colored.
    :param angle:
        degrees of rotation
    :param scale:
        Scales the image with respect to its target size.
        `scale=1.0` scales the image to fit perfectly
        within the target size.
        Based on `scale_mode` either the shorter or longer
        edge is used as reference.
        `scale=2.0` doubles the length of the sides,
        `scale=0.5` halves it.
    :param shift:
        tuple of int (x,y) defining a shift of the picture, may create undefined space, if
        source image is moved out of target image, filled up with background color.
    :param aspect:
        float of aspect ratio change
    :param hmirror:
        boolean flag for horizontal mirror
    :param vmirror:
        boolean flag for vertical mirror
    :param interp_method:
        some interpolation method. At the moment one of:
        INTERP_NEAREST
        INTERP_LINEAR
        INTERP_CUBIC
        INTERP_LANCZOS4
        INTERP_AERA
    :param gamma_gray:
        float defining a black-white gamma
    :param gamma_color:
        tuple of floats defining a rgb gamma
    :param contrast:
        float between -1 and 1 defining a contrast change
    :param noise:
        float defining a noise strength
    :param blur:
        float defining a blur intensity, i.e. the standard deviation of a gaussian filter relative to image width
    :param shear:
        float defining shear intensity, i.e. the gradient of the horizontal edges.
        A shear of 0.0 therefore creates a rectangular image.
    :param is_rgb:
        boolean that flags if rgb color encoding is used
    :param scale_mode:
        Either `'shortest'` or `'longest'`.
        Scale the image using either shortest or longest edge
        as reference.
        `'shortest'` crops part of the image if the aspect ratio
        of image and target size do not match.
        `'longest'` ensures that the whole image can be
        fit into target size.
        A scale > 1.0 makes it bigger than target image, thus parts of it get cut out.
        A scale < 1.0 makes it smaller than target image, thus parts of the target image are undefined and
        filled up with background.
    :param supersampling:
        supersampling factor, 1 turns off supersampling, 2 means 4 samples per pixel,
        3 means 9 samples and so on;
        default of 0 means choose best based on true image size, output size and scale factor
    :param gpu_augmentation:
        boolean that flags if gpu augmentations is used elsewhere and thus disables cpu augmentations in
        this method for all augmentations where gpu versions are available.
    :param do_rotate_and_resize:
        boolean that flags if rotation and resize operations are used. Mostly used for test cases.
        Should usually not be changed.
    :return:
        randomized image
    """
    # decode image, resize sub
    if do_rotate_and_resize:
        im = rotate_and_resize(
            im, angle, size, scale, aspect, shift, interp_method,
            background, hmirror, vmirror, shear,
            scale_mode, supersampling,
        )
    if not gpu_augmentation:
        # randomize the order of operations
        order = []
        if gamma_gray or gamma_color or contrast:
            order.append(0)
        if noise:
            order.append(1)
        if blur is not None and blur > 0.1/448:
            order.append(2)
        random.shuffle(order)
        for op in order:
            if op == 0:
                im = cpuaugs.add_gamma(im, color, gamma_gray, gamma_color, contrast)
            if op == 1:
                if is_rgb:
                    im = cpuaugs.add_noise_rgb(im, noise)
                else:
                    im = cpuaugs.add_noise_other(im, noise)
            if op == 2:
                im = cpuaugs.add_blur(im, blur)
    return im