| | |
| | """ |
| | Author: yangyangyang127 |
| | Github: https://github.com/yangyangyang127 |
| | Repo: https://github.com/yangyangyang127/PointCLIP_V2 |
| | Path: https://github.com/yangyangyang127/PointCLIP_V2/blob/main/zeroshot_cls/trainers/mv_utils_zs.py#L135 |
| | """ |
| |
|
| | import numpy as np |
| | import torch |
| | import torch.nn as nn |
| | from torch_scatter import scatter |
| |
|
| | TRANS = -1.5 |
| |
|
| | |
| | params = { |
| | "maxpoolz": 1, |
| | "maxpoolxy": 7, |
| | "maxpoolpadz": 0, |
| | "maxpoolpadxy": 2, |
| | "convz": 1, |
| | "convxy": 3, |
| | "convsigmaxy": 3, |
| | "convsigmaz": 1, |
| | "convpadz": 0, |
| | "convpadxy": 1, |
| | "imgbias": 0.0, |
| | "depth_bias": 0.2, |
| | "obj_ratio": 0.8, |
| | "bg_clr": 0.0, |
| | "resolution": 122, |
| | "depth": 8, |
| | "grid_height": 64, |
| | "grid_width": 64, |
| | } |
| |
|
| |
|
| | class Grid2Image(nn.Module): |
| | """A pytorch implementation to turn 3D grid to 2D image. |
| | Maxpool: densifying the grid |
| | Convolution: smoothing via Gaussian |
| | Maximize: squeezing the depth channel |
| | """ |
| |
|
| | def __init__(self): |
| | super().__init__() |
| | torch.backends.cudnn.benchmark = False |
| |
|
| | self.maxpool = nn.MaxPool3d( |
| | (params["maxpoolz"], params["maxpoolxy"], params["maxpoolxy"]), |
| | stride=1, |
| | padding=( |
| | params["maxpoolpadz"], |
| | params["maxpoolpadxy"], |
| | params["maxpoolpadxy"], |
| | ), |
| | ) |
| | self.conv = torch.nn.Conv3d( |
| | 1, |
| | 1, |
| | kernel_size=(params["convz"], params["convxy"], params["convxy"]), |
| | stride=1, |
| | padding=(params["convpadz"], params["convpadxy"], params["convpadxy"]), |
| | bias=True, |
| | ) |
| | kn3d = get3DGaussianKernel( |
| | params["convxy"], |
| | params["convz"], |
| | sigma=params["convsigmaxy"], |
| | zsigma=params["convsigmaz"], |
| | ) |
| | self.conv.weight.data = torch.Tensor(kn3d).repeat(1, 1, 1, 1, 1) |
| | self.conv.bias.data.fill_(0) |
| |
|
| | def forward(self, x): |
| | x = self.maxpool(x.unsqueeze(1)) |
| | x = self.conv(x) |
| | img = torch.max(x, dim=2)[0] |
| | img = img / torch.max(torch.max(img, dim=-1)[0], dim=-1)[0][:, :, None, None] |
| | img = 1 - img |
| | img = img.repeat(1, 3, 1, 1) |
| | return img |
| |
|
| |
|
| | def euler2mat(angle): |
| | """Convert euler angles to rotation matrix. |
| | :param angle: [3] or [b, 3] |
| | :return |
| | rotmat: [3] or [b, 3, 3] |
| | source |
| | https://github.com/ClementPinard/SfmLearner-Pytorch/blob/master/inverse_warp.py |
| | """ |
| | if len(angle.size()) == 1: |
| | x, y, z = angle[0], angle[1], angle[2] |
| | _dim = 0 |
| | _view = [3, 3] |
| | elif len(angle.size()) == 2: |
| | b, _ = angle.size() |
| | x, y, z = angle[:, 0], angle[:, 1], angle[:, 2] |
| | _dim = 1 |
| | _view = [b, 3, 3] |
| |
|
| | else: |
| | assert False |
| |
|
| | cosz = torch.cos(z) |
| | sinz = torch.sin(z) |
| |
|
| | |
| | |
| | zero = z.detach() * 0 |
| | one = zero.detach() + 1 |
| | zmat = torch.stack( |
| | [cosz, -sinz, zero, sinz, cosz, zero, zero, zero, one], dim=_dim |
| | ).reshape(_view) |
| |
|
| | cosy = torch.cos(y) |
| | siny = torch.sin(y) |
| |
|
| | ymat = torch.stack( |
| | [cosy, zero, siny, zero, one, zero, -siny, zero, cosy], dim=_dim |
| | ).reshape(_view) |
| |
|
| | cosx = torch.cos(x) |
| | sinx = torch.sin(x) |
| |
|
| | xmat = torch.stack( |
| | [one, zero, zero, zero, cosx, -sinx, zero, sinx, cosx], dim=_dim |
| | ).reshape(_view) |
| |
|
| | rot_mat = xmat @ ymat @ zmat |
| | |
| | return rot_mat |
| |
|
| |
|
| | def points_to_2d_grid( |
| | points, grid_h=params["grid_height"], grid_w=params["grid_width"] |
| | ): |
| | """ |
| | Converts a point cloud into a 2D grid based on X, Y coordinates. |
| | Points are projected onto a plane and quantized into grid cells. |
| | |
| | Args: |
| | points (torch.tensor): Tensor containing points, shape [B, P, 3] |
| | (B: batch size, P: number of points, 3: x, y, z coordinates) |
| | grid_h (int): Height of the output 2D grid. |
| | grid_w (int): Width of the output 2D grid. |
| | |
| | Returns: |
| | grid (torch.tensor): 2D grid representing the occupancy of points, |
| | shape [B, grid_h, grid_w]. |
| | Value 1.0 at cell (y, x) if at least one point falls into it, |
| | otherwise the background value (params["bg_clr"]). |
| | """ |
| | batch, pnum, _ = points.shape |
| | device = points.device |
| |
|
| | |
| | |
| | pmax_xy = points[:, :, :2].max(dim=1)[0] |
| | pmin_xy = points[:, :, :2].min(dim=1)[0] |
| |
|
| | |
| | pcent_xy = (pmax_xy + pmin_xy) / 2 |
| | pcent_xy = pcent_xy[:, None, :] |
| |
|
| | |
| | prange_xy = (pmax_xy - pmin_xy).max(dim=-1)[0][:, None, None] |
| |
|
| | |
| | epsilon = 1e-8 |
| | |
| | points_normalized_xy = (points[:, :, :2] - pcent_xy) / (prange_xy + epsilon) * 2.0 |
| |
|
| | |
| | points_normalized_xy = points_normalized_xy * params["obj_ratio"] |
| |
|
| | |
| | |
| | |
| | |
| | _x = ( |
| | (points_normalized_xy[:, :, 0] + params["obj_ratio"]) |
| | / (2 * params["obj_ratio"]) |
| | * grid_w |
| | ) |
| | _y = ( |
| | (points_normalized_xy[:, :, 1] + params["obj_ratio"]) |
| | / (2 * params["obj_ratio"]) |
| | * grid_h |
| | ) |
| |
|
| | |
| | _x = torch.floor(_x).long() |
| | _y = torch.floor(_y).long() |
| |
|
| | |
| | |
| | |
| | _x = torch.clip(_x, 0, grid_w - 1) |
| | _y = torch.clip(_y, 0, grid_h - 1) |
| |
|
| | |
| | |
| | grid = torch.full( |
| | (batch, grid_h, grid_w), params["bg_clr"], dtype=torch.float32, device=device |
| | ) |
| |
|
| | |
| | batch_indices = torch.arange(batch, device=device).view(-1, 1).repeat(1, pnum) |
| |
|
| | |
| | batch_idx_flat = batch_indices.view(-1) |
| | y_idx_flat = _y.view(-1) |
| | x_idx_flat = _x.view(-1) |
| |
|
| | |
| | |
| | grid[batch_idx_flat, y_idx_flat, x_idx_flat] = 1.0 |
| |
|
| | return grid |
| |
|
| |
|
| | def points2grid(points, resolution=params["resolution"], depth=params["depth"]): |
| | """Quantize each point cloud to a 3D grid. |
| | Args: |
| | points (torch.tensor): of size [B, _, 3] |
| | Returns: |
| | grid (torch.tensor): of size [B * self.num_views, depth, resolution, resolution] |
| | """ |
| |
|
| | batch, pnum, _ = points.shape |
| |
|
| | pmax, pmin = points.max(dim=1)[0], points.min(dim=1)[0] |
| | pcent = (pmax + pmin) / 2 |
| | pcent = pcent[:, None, :] |
| | prange = (pmax - pmin).max(dim=-1)[0][:, None, None] |
| | points = (points - pcent) / prange * 2.0 |
| | points[:, :, :2] = points[:, :, :2] * params["obj_ratio"] |
| |
|
| | depth_bias = params["depth_bias"] |
| | _x = (points[:, :, 0] + 1) / 2 * resolution |
| | _y = (points[:, :, 1] + 1) / 2 * resolution |
| | _z = ((points[:, :, 2] + 1) / 2 + depth_bias) / (1 + depth_bias) * (depth - 2) |
| |
|
| | _x.ceil_() |
| | _y.ceil_() |
| | z_int = _z.ceil() |
| |
|
| | _x = torch.clip(_x, 1, resolution - 2) |
| | _y = torch.clip(_y, 1, resolution - 2) |
| | _z = torch.clip(_z, 1, depth - 2) |
| |
|
| | coordinates = z_int * resolution * resolution + _y * resolution + _x |
| | grid = ( |
| | torch.ones([batch, depth, resolution, resolution], device=points.device).view( |
| | batch, -1 |
| | ) |
| | * params["bg_clr"] |
| | ) |
| |
|
| | grid = scatter(_z, coordinates.long(), dim=1, out=grid, reduce="max") |
| | grid = grid.reshape((batch, depth, resolution, resolution)).permute((0, 1, 3, 2)) |
| |
|
| | return grid |
| |
|
| |
|
| | def points_to_occupancy_grid( |
| | points, resolution=params["resolution"], depth=params["depth"] |
| | ): |
| | """Quantize each point cloud into a 3D occupancy grid.""" |
| |
|
| | batch, pnum, _ = points.shape |
| | device = points.device |
| |
|
| | |
| | pmax, pmin = points.max(dim=1)[0], points.min(dim=1)[0] |
| | pcent = (pmax + pmin) / 2 |
| | pcent = pcent[:, None, :] |
| | prange = (pmax - pmin).max(dim=-1)[0][ |
| | :, None, None |
| | ] + 1e-8 |
| | points_norm = (points - pcent) / prange * 2.0 |
| | points_norm[:, :, :2] = points_norm[:, :, :2] * params["obj_ratio"] |
| |
|
| | depth_bias = params["depth_bias"] |
| | _x = (points_norm[:, :, 0] + 1) / 2 * resolution |
| | _y = (points_norm[:, :, 1] + 1) / 2 * resolution |
| | _z = ((points_norm[:, :, 2] + 1) / 2 + depth_bias) / (1 + depth_bias) * (depth - 2) |
| |
|
| | _x.ceil_() |
| | _y.ceil_() |
| | z_int = _z.ceil() |
| |
|
| | _x = torch.clip(_x, 1, resolution - 2) |
| | _y = torch.clip(_y, 1, resolution - 2) |
| | |
| | z_int = torch.clip(z_int, 1, depth - 2) |
| |
|
| | |
| | coordinates = z_int * resolution * resolution + _y * resolution + _x |
| | coordinates = coordinates.long() |
| |
|
| | |
| | |
| | |
| | bg_clr_value = params.get("bg_clr", 0.0) |
| | grid = torch.full( |
| | (batch, depth * resolution * resolution), |
| | bg_clr_value, |
| | dtype=torch.float32, |
| | device=device, |
| | ) |
| |
|
| | |
| | |
| | values_to_scatter = torch.ones(batch * pnum, dtype=torch.float32, device=device) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | if bg_clr_value != 0.0: |
| | print( |
| | "Warning: bg_clr is not 0.0, occupancy grid might not be strictly binary 0/1 with reduce='max'. Consider initializing grid with 0." |
| | ) |
| |
|
| | grid = scatter( |
| | values_to_scatter, |
| | coordinates.view(-1), |
| | dim=0, |
| | out=grid.view(-1), |
| | reduce="max", |
| | ) |
| |
|
| | |
| | |
| | |
| | grid = grid.view(batch, depth, resolution, resolution) |
| | grid = grid.permute((0, 1, 3, 2)) |
| |
|
| | return grid |
| |
|
| |
|
| | class Realistic_Projection: |
| | """For creating images from PC based on the view information.""" |
| |
|
| | def __init__(self): |
| | _views = np.asarray([ |
| | [[1 * np.pi / 4, 0, np.pi / 2], [-0.5, -0.5, TRANS]], |
| | [[3 * np.pi / 4, 0, np.pi / 2], [-0.5, -0.5, TRANS]], |
| | [[5 * np.pi / 4, 0, np.pi / 2], [-0.5, -0.5, TRANS]], |
| | [[7 * np.pi / 4, 0, np.pi / 2], [-0.5, -0.5, TRANS]], |
| | [[0 * np.pi / 2, 0, np.pi / 2], [-0.5, -0.5, TRANS]], |
| | [[1 * np.pi / 2, 0, np.pi / 2], [-0.5, -0.5, TRANS]], |
| | [[2 * np.pi / 2, 0, np.pi / 2], [-0.5, -0.5, TRANS]], |
| | [[3 * np.pi / 2, 0, np.pi / 2], [-0.5, -0.5, TRANS]], |
| | [[0, -np.pi / 2, np.pi / 2], [-0.5, -0.5, TRANS]], |
| | [[0, np.pi / 2, np.pi / 2], [-0.5, -0.5, TRANS]], |
| | ]) |
| |
|
| | |
| | _views_bias = np.asarray([ |
| | [[0, np.pi / 9, 0], [-0.5, 0, TRANS]], |
| | [[0, np.pi / 9, 0], [-0.5, 0, TRANS]], |
| | [[0, np.pi / 9, 0], [-0.5, 0, TRANS]], |
| | [[0, np.pi / 9, 0], [-0.5, 0, TRANS]], |
| | [[0, np.pi / 9, 0], [-0.5, 0, TRANS]], |
| | [[0, np.pi / 9, 0], [-0.5, 0, TRANS]], |
| | [[0, np.pi / 9, 0], [-0.5, 0, TRANS]], |
| | [[0, np.pi / 9, 0], [-0.5, 0, TRANS]], |
| | [[0, np.pi / 15, 0], [-0.5, 0, TRANS]], |
| | [[0, np.pi / 15, 0], [-0.5, 0, TRANS]], |
| | ]) |
| |
|
| | self.num_views = _views.shape[0] |
| |
|
| | angle = torch.tensor(_views[:, 0, :]).float() |
| | self.rot_mat = euler2mat(angle).transpose(1, 2) |
| | angle2 = torch.tensor(_views_bias[:, 0, :]).float() |
| | self.rot_mat2 = euler2mat(angle2).transpose(1, 2) |
| |
|
| | self.translation = torch.tensor(_views[:, 1, :]).float() |
| | self.translation = self.translation.unsqueeze(1) |
| |
|
| | self.grid2image = Grid2Image() |
| |
|
| | def get_img(self, points): |
| | b, _, _ = points.shape |
| | v = self.translation.shape[0] |
| |
|
| | _points = self.point_transform( |
| | points=torch.repeat_interleave(points, v, dim=0), |
| | rot_mat=self.rot_mat.repeat(b, 1, 1), |
| | rot_mat2=self.rot_mat2.repeat(b, 1, 1), |
| | translation=self.translation.repeat(b, 1, 1), |
| | ) |
| |
|
| | grid = points2grid( |
| | points=_points, resolution=params["resolution"], depth=params["depth"] |
| | ).squeeze() |
| | img = self.grid2image(grid) |
| | return img |
| |
|
| | @staticmethod |
| | def point_transform(points, rot_mat, rot_mat2, translation): |
| | """ |
| | :param points: [batch, num_points, 3] |
| | :param rot_mat: [batch, 3] |
| | :param rot_mat2: [batch, 3] |
| | :param translation: [batch, 1, 3] |
| | :return: |
| | """ |
| | rot_mat = rot_mat.to(points.device) |
| | rot_mat2 = rot_mat2.to(points.device) |
| | translation = translation.to(points.device) |
| | points = torch.matmul(points, rot_mat) |
| | points = torch.matmul(points, rot_mat2) |
| | points = points - translation |
| | return points |
| |
|
| |
|
| | def get2DGaussianKernel(ksize, sigma=0): |
| | center = ksize // 2 |
| | xs = np.arange(ksize, dtype=np.float32) - center |
| | kernel1d = np.exp(-(xs**2) / (2 * sigma**2)) |
| | kernel = kernel1d[..., None] @ kernel1d[None, ...] |
| | kernel = torch.from_numpy(kernel) |
| | kernel = kernel / kernel.sum() |
| | return kernel |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| |
|
| | def get3DGaussianKernel(ksize, depth, sigma=2, zsigma=2): |
| | kernel2d = get2DGaussianKernel(ksize, sigma) |
| | zs = np.arange(depth, dtype=np.float32) - depth // 2 |
| | zkernel = np.exp(-(zs**2) / (2 * zsigma**2)) |
| | kernel3d = np.repeat(kernel2d[None, :, :], depth, axis=0) * zkernel[:, None, None] |
| | kernel3d = kernel3d / torch.sum(kernel3d) |
| | return kernel3d |
| |
|