add consistency

14192524 · huangzhuofei · 2cb08137 · 14192524
--- a/save_disp.py
+++ b/save_disp.py
@@ -16,11 +16,12 @@ import skimage.io
 import cv2
 import open3d as o3d
 import json
+from scipy import interpolate


-DEVICE = 'cuda'
+DEVICE = 'cuda:2'

-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+# os.environ['CUDA_VISIBLE_DEVICES'] = '2'

 def load_image(imfile, scale):
    img = np.array(Image.open(imfile)).astype(np.uint8)
@@ -28,8 +29,43 @@ def load_image(imfile, scale):
    img = torch.from_numpy(img).permute(2, 0, 1).float()
    return img[None].to(DEVICE)

+def forward_interpolate(dx: np.ndarray) -> np.ndarray:
+    img_h, img_w = dx.shape
+    x0, y0 = np.meshgrid(np.arange(img_w), np.arange(img_h))
+    x1 = x0 + dx
+    y1 = y0.copy()
+
+    valid = (x1 >= 0) & (x1 < img_w)
+
+    x1 = x1.reshape(-1)
+    y1 = y1.reshape(-1)
+    dx = dx.reshape(-1)
+    valid = valid.reshape(-1)
+
+    x1 = x1[valid]
+    y1 = y1[valid]
+    dx = dx[valid]
+
+    flow_x = interpolate.griddata((x1, y1),
+                                    dx, (x0, y0),
+                                    method='nearest',
+                                    fill_value=0)
+
+    return flow_x
+
+def cal_consist_map(left_disp: torch.Tensor,
+                    right_disp: torch.Tensor) -> np.ndarray:
+    # left_disp, right_disp: [B, 1, H, W]
+    ld = left_disp.detach().cpu().numpy().squeeze()
+    rd = right_disp.detach().cpu().numpy().squeeze()
+    # warp right disp to left
+    proj_rld = forward_interpolate(rd)
+    consist_map = np.abs(proj_rld - ld)
+    return consist_map
+
+
 def demo(args):
-    model = torch.nn.DataParallel(IGEVStereo(args), device_ids=[0])
+    model = torch.nn.DataParallel(IGEVStereo(args))
    model.load_state_dict(torch.load(args.restore_ckpt))

    model = model.module
@@ -50,8 +86,23 @@ def demo(args):
            image2 = load_image(imfile2, scale)
            padder = InputPadder(image1.shape, divis_by=32)
            image1, image2 = padder.pad(image1, image2)
-            disp = model(image1, image2, iters=args.valid_iters, test_mode=True)
+            if args.use_init:
+                disp, _ = model(image1, image2, iters=args.valid_iters, test_mode=False)
+            else:
+                disp = model(image1, image2, iters=args.valid_iters, test_mode=True)
            disp = padder.unpad(disp)
+
+            # consistency
+            flip_right = torch.flip(image2, [2, 3])
+            flip_left = torch.flip(image1, [2, 3])
+            if args.use_init:
+                flip_results, _ = model(flip_right, flip_left, iters=args.valid_iters, test_mode=False)
+            else:
+                flip_results = model(flip_right, flip_left, iters=args.valid_iters, test_mode=True)
+            flip_right_disp_pred = padder.unpad(flip_results)
+            right_disp_pred = torch.flip(flip_right_disp_pred, [2, 3])
+            conf = cal_consist_map(disp, right_disp_pred) < 0.5
+
            file_stem = os.path.join(output_directory, imfile1.split('/')[-2])
            disp = disp.cpu().numpy().squeeze()
            if args.save_png:
@@ -78,10 +129,11 @@ def demo(args):
            img_h, img_w, _ = left_img.shape

            depth_img = fx * baseline / disp
-            # filter black rectify area
-            gray = cv2.cvtColor(left_img, cv2.COLOR_BGR2GRAY)
-            valid_rect = gray > 0
-            depth_img[valid_rect == 0] = 0
+            # # filter black rectify area
+            # gray = cv2.cvtColor(left_img, cv2.COLOR_BGR2GRAY)
+            # valid_rect = gray > 0
+            # depth_img[valid_rect == 0] = 0
+            depth_img[conf == 0] = 0

            rgb_img_o3d = o3d.geometry.Image(left_img.astype(np.uint8))
            depth_img_o3d = o3d.geometry.Image(depth_img.astype(np.float32))
@@ -95,16 +147,24 @@ def demo(args):
            )
            pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_img, intrinsic)
            o3d.io.write_point_cloud(file_stem + ".ply", pcd)
+            # import ipdb;ipdb.set_trace()

 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
-    parser.add_argument('--restore_ckpt', help="restore checkpoint")
+    parser.add_argument('--restore_ckpt', help="restore checkpoint", default="/root/workspace/IGEV/checkpoints/sceneflow_no_pe/igev-stereo.pth")
+    parser.add_argument('--use_init', action='store_true', default=False, help='whether apply init disp')
+    parser.add_argument('--use_pe', action='store_true', default=False, help='whether apply position encoding')
+    parser.add_argument('--separate_mlp', action='store_true', default=False, help='whether apply separate MLP for stereo inputs')
+    parser.add_argument('--hid_feat_pe', action='store_true', default=False, help='whether apply high frequency on hidden layers')
+    parser.add_argument('--inp_feat_pe', action='store_true', default=False, help='whether apply high frequency on input layers')
+    parser.add_argument('--use_dinov2', action='store_true', default=False, help='whether apply DINOV2')
+
    parser.add_argument('--save_png', action='store_true', default=False, help='save output as gray images')
    parser.add_argument('--save_numpy', action='store_true', help='save output as numpy arrays')
-    parser.add_argument('-l', '--left_imgs', help="path to all first (left) frames", default="/root/workspace/pickwiz_data/*/rect_left.bmp")
-    parser.add_argument('-r', '--right_imgs', help="path to all second (right) frames", default="/root/workspace/pickwiz_data/*/rect_right.bmp")
+    parser.add_argument('-l', '--left_imgs', help="path to all first (left) frames", default="/root/workspace/stereo_data/unlabel/scene_000013/*/rect_left.bmp")
+    parser.add_argument('-r', '--right_imgs', help="path to all second (right) frames", default="/root/workspace/stereo_data/unlabel/scene_000013/*/rect_right.bmp")
    parser.add_argument('--scale', type=float, default=1, help='scale of test images')
-    parser.add_argument('--output_directory', help="directory to save output", default="/root/workspace/pickwiz_data/output_no_pe")
+    parser.add_argument('--output_directory', help="directory to save output", default="/root/workspace/stereo_data/consistency")
    parser.add_argument('--mixed_precision', action='store_true', default=True, help='use mixed precision')
    parser.add_argument('--valid_iters', type=int, default=32, help='number of flow-field updates during forward pass')

@@ -114,7 +174,7 @@ if __name__ == '__main__':
    parser.add_argument('--corr_radius', type=int, default=4, help="width of the correlation pyramid")
    parser.add_argument('--n_downsample', type=int, default=2, help="resolution of the disparity field (1/2^K)")
    parser.add_argument('--n_gru_layers', type=int, default=3, help="number of hidden GRU levels")
-    parser.add_argument('--max_disp', type=int, default=256, help="max disp range")
+    parser.add_argument('--max_disp', type=int, default=384, help="max disp range")
    
    args = parser.parse_args()