format and fix (works on my machine broooo)

This commit is contained in:
kayos@tcp.direct 2023-05-30 18:23:02 -07:00
parent 345c3b8c55
commit 30eb7242e2
Signed by: kayos
GPG Key ID: 4B841471B4BEE979
13 changed files with 3981 additions and 1651 deletions

7
.gitignore vendored

@ -128,3 +128,10 @@ dmypy.json
# Pyre type checker
.pyre/
Output/*
video/*
mesh/*
Inpainting/checkpoints/*.pth
Inpainting/MiDaS/model.pt

@ -3,7 +3,9 @@
# print('Depth Estimation Done!')
import os
import sys
from Inpainting import main
print('impainting started')
for i in os.listdir("Input"):
for i in os.listdir(sys.argv[1]):
print(f'processing {i}')
main.inpaint(i)

@ -8,8 +8,7 @@ from torchvision import models
class MonoDepthNet(nn.Module):
"""Network for monocular depth estimation.
"""
"""Network for monocular depth estimation."""
def __init__(self, path=None, features=256):
"""Init.
@ -24,18 +23,27 @@ class MonoDepthNet(nn.Module):
self.pretrained = nn.Module()
self.scratch = nn.Module()
self.pretrained.layer1 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu,
resnet.maxpool, resnet.layer1)
self.pretrained.layer1 = nn.Sequential(
resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool, resnet.layer1
)
self.pretrained.layer2 = resnet.layer2
self.pretrained.layer3 = resnet.layer3
self.pretrained.layer4 = resnet.layer4
# adjust channel number of feature maps
self.scratch.layer1_rn = nn.Conv2d(256, features, kernel_size=3, stride=1, padding=1, bias=False)
self.scratch.layer2_rn = nn.Conv2d(512, features, kernel_size=3, stride=1, padding=1, bias=False)
self.scratch.layer3_rn = nn.Conv2d(1024, features, kernel_size=3, stride=1, padding=1, bias=False)
self.scratch.layer4_rn = nn.Conv2d(2048, features, kernel_size=3, stride=1, padding=1, bias=False)
self.scratch.layer1_rn = nn.Conv2d(
256, features, kernel_size=3, stride=1, padding=1, bias=False
)
self.scratch.layer2_rn = nn.Conv2d(
512, features, kernel_size=3, stride=1, padding=1, bias=False
)
self.scratch.layer3_rn = nn.Conv2d(
1024, features, kernel_size=3, stride=1, padding=1, bias=False
)
self.scratch.layer4_rn = nn.Conv2d(
2048, features, kernel_size=3, stride=1, padding=1, bias=False
)
self.scratch.refinenet4 = FeatureFusionBlock(features)
self.scratch.refinenet3 = FeatureFusionBlock(features)
@ -43,12 +51,15 @@ class MonoDepthNet(nn.Module):
self.scratch.refinenet1 = FeatureFusionBlock(features)
# adaptive output module: 2 convolutions and upsampling
self.scratch.output_conv = nn.Sequential(nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1),
nn.Conv2d(128, 1, kernel_size=3, stride=1, padding=1),
Interpolate(scale_factor=2, mode='bilinear'))
self.scratch.output_conv = nn.Sequential(
nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1),
nn.Conv2d(128, 1, kernel_size=3, stride=1, padding=1),
Interpolate(scale_factor=2, mode="bilinear"),
)
# load model
if path:
print(f"loading model for monodepthnet from path {path}")
self.load(path)
def forward(self, x):
@ -91,8 +102,7 @@ class MonoDepthNet(nn.Module):
class Interpolate(nn.Module):
"""Interpolation module.
"""
"""Interpolation module."""
def __init__(self, scale_factor, mode):
"""Init.
@ -116,14 +126,15 @@ class Interpolate(nn.Module):
Returns:
tensor: interpolated data
"""
x = self.interp(x, scale_factor=self.scale_factor, mode=self.mode, align_corners=False)
x = self.interp(
x, scale_factor=self.scale_factor, mode=self.mode, align_corners=False
)
return x
class ResidualConvUnit(nn.Module):
"""Residual convolution module.
"""
"""Residual convolution module."""
def __init__(self, features):
"""Init.
@ -133,8 +144,12 @@ class ResidualConvUnit(nn.Module):
"""
super().__init__()
self.conv1 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True)
self.conv2 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=False)
self.conv1 = nn.Conv2d(
features, features, kernel_size=3, stride=1, padding=1, bias=True
)
self.conv2 = nn.Conv2d(
features, features, kernel_size=3, stride=1, padding=1, bias=False
)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
@ -155,8 +170,7 @@ class ResidualConvUnit(nn.Module):
class FeatureFusionBlock(nn.Module):
"""Feature fusion block.
"""
"""Feature fusion block."""
def __init__(self, features):
"""Init.
@ -180,7 +194,8 @@ class FeatureFusionBlock(nn.Module):
output += self.resConfUnit(xs[1])
output = self.resConfUnit(output)
output = nn.functional.interpolate(output, scale_factor=2,
mode='bilinear', align_corners=True)
output = nn.functional.interpolate(
output, scale_factor=2, mode="bilinear", align_corners=True
)
return output

@ -22,7 +22,7 @@ def run_depth(img_names, input_path, output_path, model_path, Net, utils, target
print("initialize")
# select device
device = torch.device("cpu")
device = torch.device("cuda:0")
print("device: %s" % device)
# load network

@ -4,7 +4,11 @@ background_thickness: 70
background_thickness_2: 70
context_thickness: 140
context_thickness_2: 70
crop_border: [0.03, 0.03, 0.05, 0.03]
crop_border:
- 0.03
- 0.03
- 0.05
- 0.03
depth_edge_dilate: 10
depth_edge_dilate_2: 5
depth_edge_model_ckpt: Inpainting/checkpoints/edge-model.pth
@ -16,8 +20,13 @@ discount_factor: 1.0
ext_edge_threshold: 0.002
extrapolate_border: true
extrapolation_thickness: 60
filter_size: [7, 7, 5, 5, 5]
fps: 40
filter_size:
- 7
- 7
- 5
- 5
- 5
fps: 60
gpu_ids: 0
img_format: .jpg
inference_video: true
@ -26,21 +35,41 @@ load_ply: false
log_depth: true
longer_side_len: 1280
mesh_folder: mesh
num_frames: 400
num_frames: 600
offscreen_rendering: false
redundant_number: 12
repeat_inpaint_edge: true
require_midas: true
rgb_feat_model_ckpt: Inpainting/checkpoints/color-model.pth
save_ply: false
save_ply: true
sigma_r: 0.5
sigma_s: 4.0
sparse_iter: 5
specific: ''
src_folder: Input
traj_types: [double-straight-line, double-straight-line, circle, circle]
specific: f3213296936
src_folder: /media/l/depth/
traj_types:
- double-straight-line
- double-straight-line
- circle
- circle
video_folder: video
video_postfix: [dolly-zoom-in, zoom-in, circle, swing]
x_shift_range: [0.0, 0.0, -0.02, -0.02]
y_shift_range: [0.0, 0.0, -0.02, -0.0]
z_shift_range: [-0.05, -0.05, -0.07, -0.07]
video_postfix:
- dolly-zoom-in
- zoom-in
- circle
- swing
x_shift_range:
- 0.0
- 0.0
- -0.02
- -0.02
y_shift_range:
- 0.0
- 0.0
- -0.02
- -0.0
z_shift_range:
- -0.05
- -0.05
- -0.07
- -0.07

@ -19,154 +19,238 @@ import imageio
import copy
from Inpainting.networks import Inpaint_Color_Net, Inpaint_Depth_Net, Inpaint_Edge_Net
from Inpainting.MiDaS.run import run_depth
from Inpainting.MiDaS.monodepth_net import MonoDepthNet # model to compute depth
from Inpainting.MiDaS.monodepth_net import MonoDepthNet # model to compute depth
import Inpainting.MiDaS.MiDaS_utils as MiDaS_utils
from Inpainting.bilateral_filtering import sparse_bilateral_filtering
import yaml
import subprocess
def inpaint(file_name):
subprocess.call(["sed -i 's/offscreen_rendering: True/offscreen_rendering: False/g' Inpainting/argument.yml"],shell=True)
subprocess.call(
[
"sed -i 's/offscreen_rendering: True/offscreen_rendering: False/g' Inpainting/argument.yml"
],
shell=True,
)
with open("Inpainting/argument.yml") as f:
list_doc = yaml.load(f)
argtarget = "Inpainting/argument.yml"
print(f"reading {argtarget} for arguments...")
with open(argtarget) as f:
ybytes = f.read()
list_doc = yaml.load(ybytes, yaml.UnsafeLoader)
f.close()
list_doc['src_folder'] = 'Input'
list_doc['depth_folder'] = 'Output'
list_doc['require_midas'] = True
list_doc["src_folder"] = sys.argv[1]
list_doc["depth_folder"] = "Output"
list_doc["require_midas"] = True
list_doc['specific'] = file_name.split('.')[0]
list_doc["specific"] = file_name.split(".")[0]
with open("Inpainting/argument.yml", "w") as f:
yaml.dump(list_doc, f)
with open(argtarget, "w") as f:
yaml.dump(list_doc, f)
# command line arguments
config = yaml.load(open('Inpainting/argument.yml', 'r'))
if config['offscreen_rendering'] is True:
vispy.use(app='egl')
# command line arguments
with open(argtarget, "r") as f:
config = yaml.load(f, yaml.UnsafeLoader)
f.close()
# create some directories
os.makedirs(config['mesh_folder'], exist_ok=True)
os.makedirs(config['video_folder'], exist_ok=True)
os.makedirs(config['depth_folder'], exist_ok=True)
sample_list = get_MiDaS_samples(config['src_folder'], config['depth_folder'], config, config['specific']) # dict of important stuffs
normal_canvas, all_canvas = None, None
if config["offscreen_rendering"] is True:
vispy.use(app="egl")
# find device
if isinstance(config["gpu_ids"], int) and (config["gpu_ids"] >= 0):
device = config["gpu_ids"]
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
else:
device = "cpu"
# create some directories
os.makedirs(config["mesh_folder"], exist_ok=True)
os.makedirs(config["video_folder"], exist_ok=True)
os.makedirs(config["depth_folder"], exist_ok=True)
sample_list = get_MiDaS_samples(
config["src_folder"], config["depth_folder"], config, config["specific"]
) # dict of important stuffs
normal_canvas, all_canvas = None, None
print(f"running on device {device}")
# find device
if isinstance(config["gpu_ids"], int) and (config["gpu_ids"] >= 0):
device = config["gpu_ids"]
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
else:
device = "0"
# iterate over each image.
for idx in tqdm(range(len(sample_list))):
depth = None
sample = sample_list[idx] # select image
print("Current Source ==> ", sample['src_pair_name'])
mesh_fi = os.path.join(config['mesh_folder'], sample['src_pair_name'] +'.ply')
image = imageio.imread(sample['ref_img_fi'])
print(f"running on device {device}")
print(f"Running depth extraction at {time.time()}")
if config['require_midas'] is True:
run_depth([sample['ref_img_fi']], config['src_folder'], config['depth_folder'], # compute depth
config['MiDaS_model_ckpt'], MonoDepthNet, MiDaS_utils, target_w=1280)
if 'npy' in config['depth_format']:
config['output_h'], config['output_w'] = np.load(sample['depth_fi']).shape[:2]
else:
config['output_h'], config['output_w'] = imageio.imread(sample['depth_fi']).shape[:2]
# iterate over each image.
for idx in tqdm(range(len(sample_list))):
depth = None
sample = sample_list[idx] # select image
print("Current Source ==> ", sample["src_pair_name"])
mesh_fi = os.path.join(config["mesh_folder"], sample["src_pair_name"] + ".ply")
image = imageio.imread(sample["ref_img_fi"])
frac = config['longer_side_len'] / max(config['output_h'], config['output_w'])
config['output_h'], config['output_w'] = int(config['output_h'] * frac), int(config['output_w'] * frac)
config['original_h'], config['original_w'] = config['output_h'], config['output_w']
if image.ndim == 2:
image = image[..., None].repeat(3, -1)
if np.sum(np.abs(image[..., 0] - image[..., 1])) == 0 and np.sum(np.abs(image[..., 1] - image[..., 2])) == 0:
config['gray_image'] = True
else:
config['gray_image'] = False
print(f"Running depth extraction at {time.time()}")
if config["require_midas"] is True:
run_depth(
[sample["ref_img_fi"]],
config["src_folder"],
config["depth_folder"], # compute depth
config["MiDaS_model_ckpt"],
MonoDepthNet,
MiDaS_utils,
target_w=1280,
)
if "npy" in config["depth_format"]:
config["output_h"], config["output_w"] = np.load(sample["depth_fi"]).shape[
:2
]
else:
config["output_h"], config["output_w"] = imageio.imread(
sample["depth_fi"]
).shape[:2]
image = cv2.resize(image, (config['output_w'], config['output_h']), interpolation=cv2.INTER_AREA)
frac = config["longer_side_len"] / max(config["output_h"], config["output_w"])
config["output_h"], config["output_w"] = int(config["output_h"] * frac), int(
config["output_w"] * frac
)
config["original_h"], config["original_w"] = (
config["output_h"],
config["output_w"],
)
if image.ndim == 2:
image = image[..., None].repeat(3, -1)
if (
np.sum(np.abs(image[..., 0] - image[..., 1])) == 0
and np.sum(np.abs(image[..., 1] - image[..., 2])) == 0
):
config["gray_image"] = True
else:
config["gray_image"] = False
depth = read_MiDaS_depth(sample['depth_fi'], 3.0, config['output_h'], config['output_w']) # read normalized depth computed
image = cv2.resize(
image,
(config["output_w"], config["output_h"]),
interpolation=cv2.INTER_AREA,
)
mean_loc_depth = depth[depth.shape[0]//2, depth.shape[1]//2]
depth = read_MiDaS_depth(
sample["depth_fi"], 3.0, config["output_h"], config["output_w"]
) # read normalized depth computed
if not(config['load_ply'] is True and os.path.exists(mesh_fi)):
vis_photos, vis_depths = sparse_bilateral_filtering(depth.copy(), image.copy(), config, num_iter=config['sparse_iter'], spdb=False) # do bilateral filtering
depth = vis_depths[-1]
model = None
torch.cuda.empty_cache()
## MODEL INITS
mean_loc_depth = depth[depth.shape[0] // 2, depth.shape[1] // 2]
print("Start Running 3D_Photo ...")
print(f"Loading edge model at {time.time()}")
depth_edge_model = Inpaint_Edge_Net(init_weights=True) # init edge inpainting model
depth_edge_weight = torch.load(config['depth_edge_model_ckpt'],
map_location=torch.device(device))
depth_edge_model.load_state_dict(depth_edge_weight)
depth_edge_model = depth_edge_model.to(device)
depth_edge_model.eval() # in eval mode
if not (config["load_ply"] is True and os.path.exists(mesh_fi)):
vis_photos, vis_depths = sparse_bilateral_filtering(
depth.copy(),
image.copy(),
config,
num_iter=config["sparse_iter"],
spdb=False,
) # do bilateral filtering
depth = vis_depths[-1]
model = None
torch.cuda.empty_cache()
print(f"Loading depth model at {time.time()}")
depth_feat_model = Inpaint_Depth_Net() # init depth inpainting model
depth_feat_weight = torch.load(config['depth_feat_model_ckpt'],
map_location=torch.device(device))
depth_feat_model.load_state_dict(depth_feat_weight, strict=True)
depth_feat_model = depth_feat_model.to(device)
depth_feat_model.eval()
depth_feat_model = depth_feat_model.to(device)
## MODEL INITS
print(f"Loading rgb model at {time.time()}") # init color inpainting model
rgb_model = Inpaint_Color_Net()
rgb_feat_weight = torch.load(config['rgb_feat_model_ckpt'],
map_location=torch.device(device))
rgb_model.load_state_dict(rgb_feat_weight)
rgb_model.eval()
rgb_model = rgb_model.to(device)
graph = None
print("Start Running 3D_Photo ...")
print(f"Loading edge model at {time.time()}")
depth_edge_model = Inpaint_Edge_Net(
init_weights=True
) # init edge inpainting model
depth_edge_weight = torch.load(
config["depth_edge_model_ckpt"], map_location="cuda:" + str(device)
)
depth_edge_model.load_state_dict(depth_edge_weight)
depth_edge_model = depth_edge_model.to(device)
depth_edge_model.eval() # in eval mode
print(f"Loading depth model at {time.time()}")
depth_feat_model = Inpaint_Depth_Net() # init depth inpainting model
depth_feat_weight = torch.load(
config["depth_feat_model_ckpt"], map_location=torch.device(device)
)
depth_feat_model.load_state_dict(depth_feat_weight, strict=True)
depth_feat_model = depth_feat_model.to(device)
depth_feat_model.eval()
depth_feat_model = depth_feat_model.to(device)
print(f"Writing depth ply (and basically doing everything) at {time.time()}")
# do some mesh work
starty=time.time()
rt_info = write_ply(image,
depth,
sample['int_mtx'],
mesh_fi,
config,
rgb_model,
depth_edge_model,
depth_edge_model,
depth_feat_model)
print(f"Loading rgb model at {time.time()}") # init color inpainting model
rgb_model = Inpaint_Color_Net()
rgb_feat_weight = torch.load(
config["rgb_feat_model_ckpt"], map_location=torch.device(device)
)
rgb_model.load_state_dict(rgb_feat_weight)
rgb_model.eval()
rgb_model = rgb_model.to(device)
graph = None
if rt_info is False:
continue
rgb_model = None
color_feat_model = None
depth_edge_model = None
depth_feat_model = None
torch.cuda.empty_cache()
print(f'Total Time taken: {time.time()-starty}')
if config['save_ply'] is True or config['load_ply'] is True:
verts, colors, faces, Height, Width, hFov, vFov = read_ply(mesh_fi) # read from whatever mesh thing has done
else:
verts, colors, faces, Height, Width, hFov, vFov = rt_info
print(
f"Writing depth ply (and basically doing everything) at {time.time()}"
)
# do some mesh work
starty = time.time()
rt_info = write_ply(
image,
depth,
sample["int_mtx"],
mesh_fi,
config,
rgb_model,
depth_edge_model,
depth_edge_model,
depth_feat_model,
)
startx = time.time()
print(f"Making video at {time.time()}")
videos_poses, video_basename = copy.deepcopy(sample['tgts_poses']), sample['tgt_name']
top = (config.get('original_h') // 2 - sample['int_mtx'][1, 2] * config['output_h'])
left = (config.get('original_w') // 2 - sample['int_mtx'][0, 2] * config['output_w'])
down, right = top + config['output_h'], left + config['output_w']
border = [int(xx) for xx in [top, down, left, right]]
normal_canvas, all_canvas = output_3d_photo(verts.copy(), colors.copy(), faces.copy(), copy.deepcopy(Height), copy.deepcopy(Width), copy.deepcopy(hFov), copy.deepcopy(vFov),
copy.deepcopy(sample['tgt_pose']), sample['video_postfix'], copy.deepcopy(sample['ref_pose']), copy.deepcopy(config['video_folder']),
image.copy(), copy.deepcopy(sample['int_mtx']), config, image,
videos_poses, video_basename, config.get('original_h'), config.get('original_w'), border=border, depth=depth, normal_canvas=normal_canvas, all_canvas=all_canvas,
mean_loc_depth=mean_loc_depth)
print(f"Total Time taken: {time.time()-startx}")
if rt_info is False:
continue
rgb_model = None
color_feat_model = None
depth_edge_model = None
depth_feat_model = None
torch.cuda.empty_cache()
print(f"Total Time taken: {time.time()-starty}")
if config["save_ply"] is True or config["load_ply"] is True:
verts, colors, faces, Height, Width, hFov, vFov = read_ply(
mesh_fi
) # read from whatever mesh thing has done
else:
verts, colors, faces, Height, Width, hFov, vFov = rt_info
startx = time.time()
print(f"Making video at {time.time()}")
videos_poses, video_basename = (
copy.deepcopy(sample["tgts_poses"]),
sample["tgt_name"],
)
top = (
config.get("original_h") // 2 - sample["int_mtx"][1, 2] * config["output_h"]
)
left = (
config.get("original_w") // 2 - sample["int_mtx"][0, 2] * config["output_w"]
)
down, right = top + config["output_h"], left + config["output_w"]
border = [int(xx) for xx in [top, down, left, right]]
normal_canvas, all_canvas = output_3d_photo(
verts.copy(),
colors.copy(),
faces.copy(),
copy.deepcopy(Height),
copy.deepcopy(Width),
copy.deepcopy(hFov),
copy.deepcopy(vFov),
copy.deepcopy(sample["tgt_pose"]),
sample["video_postfix"],
copy.deepcopy(sample["ref_pose"]),
copy.deepcopy(config["video_folder"]),
image.copy(),
copy.deepcopy(sample["int_mtx"]),
config,
image,
videos_poses,
video_basename,
config.get("original_h"),
config.get("original_w"),
border=border,
depth=depth,
normal_canvas=normal_canvas,
all_canvas=all_canvas,
mean_loc_depth=mean_loc_depth,
)
print(f"Total Time taken: {time.time()-startx}")

File diff suppressed because it is too large Load Diff

@ -1,7 +1,7 @@
import os
import numpy as np
try:
import cynetworkx as netx
import networkx as netx
except ImportError:
import networkx as netx

@ -1,7 +1,7 @@
opencv-python==4.2.0.32
vispy==0.6.4
opencv-python~=4.2
vispy
moviepy==1.0.2
transforms3d==0.3.1
networkx==2.3
cynetworkx
networkx
scikit-image

File diff suppressed because it is too large Load Diff

@ -1,6 +1,14 @@
# 3D-Photography: Image Inpainting | Depth-Estimation
<p>Sensing Depth from 2D Images and Inpainting Background behind the Foreground objects to create 3D Photos with Parallax Animation.</p>
### it may even work now
you can get real cheeky and try to get this to work in 2023, i had success with 3.10 venv after my modifications, check `getmodels.sh` as well
#### example
`python3 3dphotography.py /path/to/images`
##### you're welcome, from yung innanet
---
## Depth Estimation by Tiefenrausch and PyDNet respectively.
@ -28,4 +36,4 @@
volume = {39},
number = {4},
year = {2020}
}
}

16
getmodels.sh Executable file

@ -0,0 +1,16 @@
#!/usr/bin/env bash
set -e
mkdir -p Inpainting/checkpoints
wget https://filebox.ece.vt.edu/~jbhuang/project/3DPhoto/model/color-model.pth
mv color-mode.pth Inpainting/checkpoints/
wget https://filebox.ece.vt.edu/~jbhuang/project/3DPhoto/model/depth-model.pth
mv depth-model.pth Inpainting/checkpoints/
wget https://filebox.ece.vt.edu/~jbhuang/project/3DPhoto/model/edge-model.pth
mv edge-model.pth Inpainting/checkpoints/
wget https://filebox.ece.vt.edu/~jbhuang/project/3DPhoto/model/model.pt
mv model.pt Inpainting/MiDaS/
echo "congrats it might even work now maybe :^)"

1
requirements.txt Symbolic link

@ -0,0 +1 @@
Inpainting/requirements.txt