Spaces:
Runtime error
Runtime error
| # coding=utf-8 | |
| # Copyright 2021 The Deeplab2 Authors. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| """Utility functions related to preprocessing inputs.""" | |
| import numpy as np | |
| import tensorflow as tf | |
| def flip_dim(tensor_list, prob=0.5, dim=1): | |
| """Randomly flips a dimension of the given tensor. | |
| The decision to randomly flip the `Tensors` is made together. In other words, | |
| all or none of the images pass in are flipped. | |
| Note that tf.random_flip_left_right and tf.random_flip_up_down isn't used so | |
| that we can control for the probability as well as ensure the same decision | |
| is applied across the images. | |
| Args: | |
| tensor_list: A list of `Tensors` with the same number of dimensions. | |
| prob: The probability of a left-right flip. | |
| dim: The dimension to flip, 0, 1, .. | |
| Returns: | |
| outputs: A list of the possibly flipped `Tensors` as well as an indicator | |
| `Tensor` at the end whose value is `True` if the inputs were flipped and | |
| `False` otherwise. | |
| Raises: | |
| ValueError: If dim is negative or greater than the dimension of a `Tensor`. | |
| """ | |
| random_value = tf.random.uniform([]) | |
| def flip(): | |
| flipped = [] | |
| for tensor in tensor_list: | |
| if dim < 0 or dim >= len(tensor.get_shape().as_list()): | |
| raise ValueError('dim must represent a valid dimension.') | |
| flipped.append(tf.reverse(tensor, [dim])) | |
| return flipped | |
| is_flipped = tf.less_equal(random_value, prob) | |
| outputs = tf.cond(is_flipped, flip, lambda: tensor_list) | |
| if not isinstance(outputs, (list, tuple)): | |
| outputs = [outputs] | |
| outputs.append(is_flipped) | |
| return outputs | |
| def get_label_resize_method(label): | |
| """Returns the resize method of labels depending on label dtype. | |
| Args: | |
| label: Groundtruth label tensor. | |
| Returns: | |
| tf.image.ResizeMethod.BILINEAR, if label dtype is floating. | |
| tf.image.ResizeMethod.NEAREST_NEIGHBOR, if label dtype is integer. | |
| Raises: | |
| ValueError: If label is neither floating nor integer. | |
| """ | |
| if label.dtype.is_floating: | |
| return tf.image.ResizeMethod.BILINEAR | |
| elif label.dtype.is_integer: | |
| return tf.image.ResizeMethod.NEAREST_NEIGHBOR | |
| else: | |
| raise ValueError('Label type must be either floating or integer.') | |
| def _crop(image, offset_height, offset_width, crop_height, crop_width): | |
| """Crops the given image using the provided offsets and sizes. | |
| Note that the method doesn't assume we know the input image size but it does | |
| assume we know the input image rank. | |
| Args: | |
| image: an image of shape [height, width, channels]. | |
| offset_height: a scalar tensor indicating the height offset. | |
| offset_width: a scalar tensor indicating the width offset. | |
| crop_height: the height of the cropped image. | |
| crop_width: the width of the cropped image. | |
| Returns: | |
| The cropped (and resized) image. | |
| Raises: | |
| ValueError: if `image` doesn't have rank of 3. | |
| InvalidArgumentError: if the rank is not 3 or if the image dimensions are | |
| less than the crop size. | |
| """ | |
| original_shape = tf.shape(image) | |
| if len(image.get_shape().as_list()) != 3: | |
| raise ValueError('input must have rank of 3') | |
| original_channels = image.get_shape().as_list()[2] | |
| rank_assertion = tf.Assert( | |
| tf.equal(tf.rank(image), 3), | |
| ['Rank of image must be equal to 3.']) | |
| with tf.control_dependencies([rank_assertion]): | |
| cropped_shape = tf.stack([crop_height, crop_width, original_shape[2]]) | |
| size_assertion = tf.Assert( | |
| tf.logical_and( | |
| tf.greater_equal(original_shape[0], crop_height), | |
| tf.greater_equal(original_shape[1], crop_width)), | |
| ['Crop size greater than the image size.']) | |
| offsets = tf.cast(tf.stack([offset_height, offset_width, 0]), tf.int32) | |
| # Use tf.slice instead of crop_to_bounding box as it accepts tensors to | |
| # define the crop size. | |
| with tf.control_dependencies([size_assertion]): | |
| image = tf.slice(image, offsets, cropped_shape) | |
| image = tf.reshape(image, cropped_shape) | |
| image.set_shape([crop_height, crop_width, original_channels]) | |
| return image | |
| def random_crop(image_list, crop_height, crop_width): | |
| """Crops the given list of images. | |
| The function applies the same crop to each image in the list. This can be | |
| effectively applied when there are multiple image inputs of the same | |
| dimension such as: | |
| image, depths, normals = random_crop([image, depths, normals], 120, 150) | |
| Args: | |
| image_list: a list of image tensors of the same dimension but possibly | |
| varying channel. | |
| crop_height: the new height. | |
| crop_width: the new width. | |
| Returns: | |
| the image_list with cropped images. | |
| Raises: | |
| ValueError: if there are multiple image inputs provided with different size | |
| or the images are smaller than the crop dimensions. | |
| """ | |
| if not image_list: | |
| raise ValueError('Empty image_list.') | |
| # Compute the rank assertions. | |
| rank_assertions = [] | |
| for i in range(len(image_list)): | |
| image_rank = tf.rank(image_list[i]) | |
| rank_assert = tf.Assert( | |
| tf.equal(image_rank, 3), [ | |
| 'Wrong rank for tensor %d in image_list [expected] [actual]', i, 3, | |
| image_rank | |
| ]) | |
| rank_assertions.append(rank_assert) | |
| with tf.control_dependencies([rank_assertions[0]]): | |
| image_shape = tf.shape(image_list[0]) | |
| image_height = image_shape[0] | |
| image_width = image_shape[1] | |
| crop_size_assert = tf.Assert( | |
| tf.logical_and( | |
| tf.greater_equal(image_height, crop_height), | |
| tf.greater_equal(image_width, crop_width)), | |
| ['Crop size greater than the image size.']) | |
| asserts = [rank_assertions[0], crop_size_assert] | |
| for i in range(1, len(image_list)): | |
| image = image_list[i] | |
| asserts.append(rank_assertions[i]) | |
| with tf.control_dependencies([rank_assertions[i]]): | |
| shape = tf.shape(image) | |
| height = shape[0] | |
| width = shape[1] | |
| height_assert = tf.Assert( | |
| tf.equal(height, image_height), [ | |
| 'Wrong height for tensor %d in image_list [expected][actual]', i, | |
| height, image_height | |
| ]) | |
| width_assert = tf.Assert( | |
| tf.equal(width, image_width), [ | |
| 'Wrong width for tensor %d in image_list [expected][actual]', i, | |
| width, image_width | |
| ]) | |
| asserts.extend([height_assert, width_assert]) | |
| # Create a random bounding box. | |
| # | |
| # Use tf.random.uniform and not numpy.random.rand as doing the former would | |
| # generate random numbers at graph eval time, unlike the latter which | |
| # generates random numbers at graph definition time. | |
| with tf.control_dependencies(asserts): | |
| max_offset_height = tf.reshape(image_height - crop_height + 1, []) | |
| max_offset_width = tf.reshape(image_width - crop_width + 1, []) | |
| offset_height = tf.random.uniform([], | |
| maxval=max_offset_height, | |
| dtype=tf.int32) | |
| offset_width = tf.random.uniform([], maxval=max_offset_width, dtype=tf.int32) | |
| return [_crop(image, offset_height, offset_width, | |
| crop_height, crop_width) for image in image_list] | |
| def get_random_scale(min_scale_factor, max_scale_factor, step_size): | |
| """Gets a random scale value. | |
| Args: | |
| min_scale_factor: Minimum scale value. | |
| max_scale_factor: Maximum scale value. | |
| step_size: The step size from minimum to maximum value. | |
| Returns: | |
| A tensor with random scale value selected between minimum and maximum value. | |
| If `min_scale_factor` and `max_scale_factor` are the same, a number is | |
| returned instead. | |
| Raises: | |
| ValueError: min_scale_factor has unexpected value. | |
| """ | |
| if min_scale_factor < 0 or min_scale_factor > max_scale_factor: | |
| raise ValueError('Unexpected value of min_scale_factor.') | |
| if min_scale_factor == max_scale_factor: | |
| return np.float32(min_scale_factor) | |
| # When step_size = 0, we sample the value uniformly from [min, max). | |
| if step_size == 0: | |
| return tf.random.uniform([1], | |
| minval=min_scale_factor, | |
| maxval=max_scale_factor) | |
| # When step_size != 0, we randomly select one discrete value from [min, max]. | |
| num_steps = int((max_scale_factor - min_scale_factor) / step_size + 1) | |
| scale_factors = tf.linspace(min_scale_factor, max_scale_factor, num_steps) | |
| shuffled_scale_factors = tf.random.shuffle(scale_factors) | |
| return shuffled_scale_factors[0] | |
| def randomly_scale_image_and_label(image, label=None, scale=1.0): | |
| """Randomly scales image and label. | |
| Args: | |
| image: Image with shape [height, width, 3]. | |
| label: Label with shape [height, width, 1]. | |
| scale: The value to scale image and label. | |
| Returns: | |
| Scaled image and label. | |
| """ | |
| # No random scaling if scale == 1. | |
| if scale == 1.0: | |
| return image, label | |
| image_shape = tf.shape(image) | |
| new_dim = tf.cast( | |
| tf.cast([image_shape[0], image_shape[1]], tf.float32) * scale, | |
| tf.int32) | |
| # Need squeeze and expand_dims because image interpolation takes | |
| # 4D tensors as input. | |
| image = tf.squeeze( | |
| tf.compat.v1.image.resize_bilinear( | |
| tf.expand_dims(image, 0), new_dim, align_corners=True), [0]) | |
| if label is not None: | |
| label = tf.compat.v1.image.resize( | |
| label, | |
| new_dim, | |
| method=get_label_resize_method(label), | |
| align_corners=True) | |
| return image, label | |
| def resolve_shape(tensor, rank=None): | |
| """Fully resolves the shape of a Tensor. | |
| Use as much as possible the shape components already known during graph | |
| creation and resolve the remaining ones during runtime. | |
| Args: | |
| tensor: Input tensor whose shape we query. | |
| rank: The rank of the tensor, provided that we know it. | |
| Returns: | |
| shape: The full shape of the tensor. | |
| """ | |
| if rank is not None: | |
| shape = tensor.get_shape().with_rank(rank).as_list() | |
| else: | |
| shape = tensor.get_shape().as_list() | |
| if None in shape: | |
| dynamic_shape = tf.shape(tensor) | |
| for i in range(len(shape)): | |
| if shape[i] is None: | |
| shape[i] = dynamic_shape[i] | |
| return shape | |
| def _scale_dim(original_size, factor): | |
| """Helper method to scale one input dimension by the given factor.""" | |
| original_size = tf.cast(original_size, tf.float32) | |
| factor = tf.cast(factor, tf.float32) | |
| return tf.cast(tf.floor(original_size * factor), tf.int32) | |
| def process_resize_value(resize_spec): | |
| """Helper method to process input resize spec. | |
| Args: | |
| resize_spec: Either None, a python scalar, or a sequence with length <=2. | |
| Each value in the sequence should be a python integer. | |
| Returns: | |
| None if input size is not valid, or 2-tuple of (height, width), derived | |
| from input resize_spec. | |
| """ | |
| if not resize_spec: | |
| return None | |
| if isinstance(resize_spec, int): | |
| # For conveniences and also backward compatibility. | |
| resize_spec = (resize_spec,) | |
| resize_spec = tuple(resize_spec) | |
| if len(resize_spec) == 1: | |
| resize_spec = (resize_spec[0], resize_spec[0]) | |
| if len(resize_spec) != 2: | |
| raise ValueError('Unable to process input resize_spec: %s' % resize_spec) | |
| if resize_spec[0] <= 0 or resize_spec[1] <= 0: | |
| return None | |
| return resize_spec | |
| def _resize_to_match_min_size(input_shape, min_size): | |
| """Returns the resized shape so that both sides match minimum size. | |
| Note: the input image will still be scaled if input height and width | |
| are already greater than minimum size. | |
| Args: | |
| input_shape: A 2-tuple, (height, width) of the input image. Each value can | |
| be either a python integer or a integer scalar tensor. | |
| min_size: A tuple of (minimum height, minimum width) to specify the | |
| minimum shape after resize. The input shape would be scaled so that both | |
| height and width will be greater than or equal to their minimum value. | |
| Returns: | |
| A 2-tuple, (height, width), resized input shape which preserves input | |
| aspect ratio. | |
| """ | |
| input_height, input_width = input_shape | |
| min_height, min_width = min_size | |
| scale_factor = tf.maximum(min_height / input_height, min_width / input_width) | |
| return (_scale_dim(input_height, scale_factor), | |
| _scale_dim(input_width, scale_factor)) | |
| def _resize_to_fit_max_size(input_shape, max_size): | |
| """Returns the resized shape so that both sides fit within max size. | |
| Note: if input shape is already smaller or equal to maximum size, no resize | |
| operation would be performed. | |
| Args: | |
| input_shape: A 2-tuple, (height, width) of the input image. Each value can | |
| be either a python integer or a integer scalar tensor. | |
| max_size: A tuple of (minimum height, minimum width) to specify | |
| the maximum allowed shape after resize. | |
| Returns: | |
| A 2-tuple, (height, width), resized input shape which preserves input | |
| aspect ratio. | |
| """ | |
| input_height, input_width = input_shape | |
| max_height, max_width = max_size | |
| scale_factor = tf.minimum(max_height / input_height, max_width / input_width) | |
| scale_factor = tf.minimum(tf.cast(scale_factor, tf.float32), | |
| tf.cast(1.0, tf.float32)) | |
| return (_scale_dim(input_height, scale_factor), | |
| _scale_dim(input_width, scale_factor)) | |
| def resize_to_range_helper(input_shape, min_size, max_size=None, factor=None): | |
| """Determines output size in specified range. | |
| The output size (height and/or width) can be described by two cases: | |
| 1. If current side can be rescaled so its minimum size is equal to min_size | |
| without the other side exceeding its max_size, then do so. | |
| 2. Otherwise, resize so at least one side is reaching its max_size. | |
| An integer in `range(factor)` is added to the computed sides so that the | |
| final dimensions are multiples of `factor` plus one. | |
| Args: | |
| input_shape: A 2-tuple, (height, width) of the input image. Each value can | |
| be either a python integer or a integer scalar tensor. | |
| min_size: A 2-tuple of (height, width), desired minimum value after resize. | |
| If a single element is given, then height and width share the same | |
| min_size. None, empty or having 0 indicates no minimum value will be used. | |
| max_size: A 2-tuple of (height, width), maximum allowed value after resize. | |
| If a single element is given, then height and width share the same | |
| max_size. None, empty or having 0 indicates no maximum value will be used. | |
| Note that the output dimension is no larger than max_size and may be | |
| slightly smaller than max_size when factor is not None. | |
| factor: None or integer, make output size multiple of factor plus one. | |
| Returns: | |
| A 1-D tensor containing the [new_height, new_width]. | |
| """ | |
| output_shape = input_shape | |
| min_size = process_resize_value(min_size) | |
| if min_size: | |
| output_shape = _resize_to_match_min_size(input_shape, min_size) | |
| max_size = process_resize_value(max_size) | |
| if max_size: | |
| if factor: | |
| # Update max_size to be a multiple of factor plus 1 and make sure the | |
| # max dimension after resizing is no larger than max_size. | |
| max_size = (max_size[0] - (max_size[0] - 1) % factor, | |
| max_size[1] - (max_size[1] - 1) % factor) | |
| output_shape = _resize_to_fit_max_size(output_shape, max_size) | |
| output_shape = tf.stack(output_shape) | |
| # Ensure that both output sides are multiples of factor plus one. | |
| if factor: | |
| output_shape += (factor - (output_shape - 1) % factor) % factor | |
| return output_shape | |
| def resize_to_range(image, | |
| label=None, | |
| min_size=None, | |
| max_size=None, | |
| factor=None, | |
| align_corners=True, | |
| method=tf.image.ResizeMethod.BILINEAR): | |
| """Resizes image or label so their sides are within the provided range. | |
| The output size (height and/or width) can be described by two cases: | |
| 1. If current side can be rescaled so its minimum size is equal to min_size | |
| without the other side exceeding its max_size, then do so. | |
| 2. Otherwise, resize so at least one side is reaching its max_size. | |
| An integer in `range(factor)` is added to the computed sides so that the | |
| final dimensions are multiples of `factor` plus one. | |
| Args: | |
| image: A 3D tensor of shape [height, width, channels]. | |
| label: (optional) A 3D tensor of shape [height, width, channels]. | |
| min_size: A 2-tuple of (height, width), desired minimum value after resize. | |
| If a single element is given, then height and width share the same | |
| min_size. None, empty or having 0 indicates no minimum value will be used. | |
| max_size: A 2-tuple of (height, width), maximum allowed value after resize. | |
| If a single element is given, then height and width share the same | |
| max_size. None, empty or having 0 indicates no maximum value will be used. | |
| Note that the output dimension is no larger than max_size and may be | |
| slightly smaller than max_size when factor is not None. | |
| factor: Make output size multiple of factor plus one. | |
| align_corners: If True, exactly align all 4 corners of input and output. | |
| method: Image resize method. Defaults to tf.image.ResizeMethod.BILINEAR. | |
| Returns: | |
| resized_image: A 3-D tensor of shape [new_height, new_width, channels], | |
| where the image has been resized with the specified method. | |
| resized_label: Either None (if input label is None) or a 3-D tensor, | |
| where the input label has been resized accordingly. | |
| Raises: | |
| ValueError: If the image is not a 3D tensor. | |
| """ | |
| orig_height, orig_width, _ = resolve_shape(image, rank=3) | |
| new_size = resize_to_range_helper(input_shape=(orig_height, orig_width), | |
| min_size=min_size, | |
| max_size=max_size, | |
| factor=factor) | |
| resized_image = tf.compat.v1.image.resize( | |
| image, new_size, method=method, align_corners=align_corners) | |
| if label is None: | |
| return resized_image, None | |
| resized_label = tf.compat.v1.image.resize( | |
| label, | |
| new_size, | |
| method=get_label_resize_method(label), | |
| align_corners=align_corners) | |
| return resized_image, resized_label | |