SAIL-Recon / eval /utils /cropping.py
hengli
first
b7f83b0
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import os
# Copyright (C) 2024-present Naver Corporation. All rights reserved.
# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
#
# --------------------------------------------------------
# croppping utilities
# --------------------------------------------------------
import PIL.Image
os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1"
import cv2 # noqa
import numpy as np # noqa
from easyvolcap.reloc_eval.utils.device import to_numpy
from easyvolcap.reloc_eval.utils.geometry import ( # noqa
colmap_to_opencv_intrinsics,
geotrf,
inv,
opencv_to_colmap_intrinsics,
)
try:
lanczos = PIL.Image.Resampling.LANCZOS
bicubic = PIL.Image.Resampling.BICUBIC
except AttributeError:
lanczos = PIL.Image.LANCZOS
bicubic = PIL.Image.BICUBIC
class ImageList:
"""Convenience class to aply the same operation to a whole set of images."""
def __init__(self, images):
if not isinstance(images, (tuple, list, set)):
images = [images]
self.images = []
for image in images:
if not isinstance(image, PIL.Image.Image):
image = PIL.Image.fromarray(image)
self.images.append(image)
def __len__(self):
return len(self.images)
def to_pil(self):
return tuple(self.images) if len(self.images) > 1 else self.images[0]
@property
def size(self):
sizes = [im.size for im in self.images]
assert all(sizes[0] == s for s in sizes)
return sizes[0]
def resize(self, *args, **kwargs):
return ImageList(self._dispatch("resize", *args, **kwargs))
def crop(self, *args, **kwargs):
return ImageList(self._dispatch("crop", *args, **kwargs))
def _dispatch(self, func, *args, **kwargs):
return [getattr(im, func)(*args, **kwargs) for im in self.images]
def rescale_image_depthmap(
image, depthmap, camera_intrinsics, output_resolution, force=True
):
"""Jointly rescale a (image, depthmap)
so that (out_width, out_height) >= output_res
"""
image = ImageList(image)
input_resolution = np.array(image.size) # (W,H)
output_resolution = np.array(output_resolution)
if depthmap is not None:
# can also use this with masks instead of depthmaps
assert tuple(depthmap.shape[:2]) == image.size[::-1]
# define output resolution
assert output_resolution.shape == (2,)
scale_final = max(output_resolution / image.size) + 1e-8
if scale_final >= 1 and not force: # image is already smaller than what is asked
return (image.to_pil(), depthmap, camera_intrinsics)
output_resolution = np.floor(input_resolution * scale_final).astype(int)
# first rescale the image so that it contains the crop
image = image.resize(
tuple(output_resolution), resample=lanczos if scale_final < 1 else bicubic
)
if depthmap is not None:
depthmap = cv2.resize(
depthmap,
output_resolution,
fx=scale_final,
fy=scale_final,
interpolation=cv2.INTER_NEAREST,
)
# no offset here; simple rescaling
camera_intrinsics = camera_matrix_of_crop(
camera_intrinsics, input_resolution, output_resolution, scaling=scale_final
)
return image.to_pil(), depthmap, camera_intrinsics
def camera_matrix_of_crop(
input_camera_matrix,
input_resolution,
output_resolution,
scaling=1,
offset_factor=0.5,
offset=None,
):
# Margins to offset the origin
margins = np.asarray(input_resolution) * scaling - output_resolution
assert np.all(margins >= 0.0)
if offset is None:
offset = offset_factor * margins
# Generate new camera parameters
output_camera_matrix_colmap = opencv_to_colmap_intrinsics(input_camera_matrix)
output_camera_matrix_colmap[:2, :] *= scaling
output_camera_matrix_colmap[:2, 2] -= offset
output_camera_matrix = colmap_to_opencv_intrinsics(output_camera_matrix_colmap)
return output_camera_matrix
def crop_image_depthmap(image, depthmap, camera_intrinsics, crop_bbox):
"""
Return a crop of the input view.
"""
image = ImageList(image)
l, t, r, b = crop_bbox
image = image.crop((l, t, r, b))
if depthmap is not None:
depthmap = depthmap[t:b, l:r]
camera_intrinsics = camera_intrinsics.copy()
camera_intrinsics[0, 2] -= l
camera_intrinsics[1, 2] -= t
return image.to_pil(), depthmap, camera_intrinsics
def bbox_from_intrinsics_in_out(
input_camera_matrix, output_camera_matrix, output_resolution
):
out_width, out_height = output_resolution
l, t = np.int32(np.round(input_camera_matrix[:2, 2] - output_camera_matrix[:2, 2]))
crop_bbox = (l, t, l + out_width, t + out_height)
return crop_bbox
def reciprocal_1d(corres_1_to_2, corres_2_to_1, ret_recip=False):
is_reciprocal1 = corres_2_to_1[corres_1_to_2] == np.arange(len(corres_1_to_2))
pos1 = is_reciprocal1.nonzero()[0]
pos2 = corres_1_to_2[pos1]
if ret_recip:
return is_reciprocal1, pos1, pos2
return pos1, pos2
def generate_non_self_pairs(n):
i, j = np.meshgrid(np.arange(n), np.arange(n), indexing="ij")
pairs = np.stack([i.ravel(), j.ravel()], axis=1)
mask = pairs[:, 0] != pairs[:, 1]
filtered_pairs = pairs[mask]
return filtered_pairs
def unravel_xy(pos, shape):
# convert (x+W*y) back to 2d (x,y) coordinates
return np.unravel_index(pos, shape)[0].base[:, ::-1].copy()
def ravel_xy(pos, shape):
H, W = shape
with np.errstate(invalid="ignore"):
qx, qy = pos.reshape(-1, 2).round().astype(np.int32).T
quantized_pos = qx.clip(min=0, max=W - 1, out=qx) + W * qy.clip(
min=0, max=H - 1, out=qy
)
return quantized_pos
def extract_correspondences_from_pts3d(
view1, view2, target_n_corres, rng=np.random, ret_xy=True, nneg=0
):
view1, view2 = to_numpy((view1, view2))
# project pixels from image1 --> 3d points --> image2 pixels
shape1, corres1_to_2 = reproject_view(view1["pts3d"], view2)
shape2, corres2_to_1 = reproject_view(view2["pts3d"], view1)
# compute reciprocal correspondences:
# pos1 == valid pixels (correspondences) in image1
is_reciprocal1, pos1, pos2 = reciprocal_1d(
corres1_to_2, corres2_to_1, ret_recip=True
)
is_reciprocal2 = corres1_to_2[corres2_to_1] == np.arange(len(corres2_to_1))
if target_n_corres is None:
if ret_xy:
pos1 = unravel_xy(pos1, shape1)
pos2 = unravel_xy(pos2, shape2)
return pos1, pos2
available_negatives = min((~is_reciprocal1).sum(), (~is_reciprocal2).sum())
target_n_positives = int(target_n_corres * (1 - nneg))
n_positives = min(len(pos1), target_n_positives)
n_negatives = min(target_n_corres - n_positives, available_negatives)
if n_negatives + n_positives != target_n_corres:
# should be really rare => when there are not enough negatives
# in that case, break nneg and add a few more positives ?
n_positives = target_n_corres - n_negatives
assert n_positives <= len(pos1)
assert n_positives <= len(pos1)
assert n_positives <= len(pos2)
assert n_negatives <= (~is_reciprocal1).sum()
assert n_negatives <= (~is_reciprocal2).sum()
assert n_positives + n_negatives == target_n_corres
valid = np.ones(n_positives, dtype=bool)
if n_positives < len(pos1):
# random sub-sampling of valid correspondences
perm = rng.permutation(len(pos1))[:n_positives]
pos1 = pos1[perm]
pos2 = pos2[perm]
if n_negatives > 0:
# add false correspondences if not enough
def norm(p):
return p / p.sum()
pos1 = np.r_[
pos1,
rng.choice(
shape1[0] * shape1[1],
size=n_negatives,
replace=False,
p=norm(~is_reciprocal1),
),
]
pos2 = np.r_[
pos2,
rng.choice(
shape2[0] * shape2[1],
size=n_negatives,
replace=False,
p=norm(~is_reciprocal2),
),
]
valid = np.r_[valid, np.zeros(n_negatives, dtype=bool)]
# convert (x+W*y) back to 2d (x,y) coordinates
if ret_xy:
pos1 = unravel_xy(pos1, shape1)
pos2 = unravel_xy(pos2, shape2)
return pos1, pos2, valid
def reproject_view(pts3d, view2):
shape = view2["pts3d"].shape[:2]
return reproject(
pts3d, view2["camera_intrinsics"], inv(view2["camera_pose"]), shape
)
def reproject(pts3d, K, world2cam, shape):
H, W, THREE = pts3d.shape
assert THREE == 3
# reproject in camera2 space
with np.errstate(divide="ignore", invalid="ignore"):
pos = geotrf(K @ world2cam[:3], pts3d, norm=1, ncol=2)
# quantize to pixel positions
return (H, W), ravel_xy(pos, shape)