-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathFinalCode.py
163 lines (135 loc) · 6.01 KB
/
FinalCode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import time
import datetime
#initial = time.time()
import matplotlib
matplotlib.use('Agg')
import os, sys
import yaml
from argparse import ArgumentParser
from tqdm import tqdm
#from gtts import gTTS
import pyttsx3
import sda
import imageio
import os
import ffmpeg
import sys
import numpy as np
from skimage.transform import resize
from skimage import img_as_ubyte
import torch
from sync_batchnorm import DataParallelWithCallback
from modules.generator import OcclusionAwareGenerator
from modules.keypoint_detector import KPDetector
from animate import normalize_kp
from scipy.spatial import ConvexHull
from gTTS import synthesize_text
import warnings
warnings.filterwarnings("ignore")
#Loading the checkpoints and the config files
def load_checkpoints(config_path, checkpoint_path, cpu=True):
with open(config_path) as f:
config = yaml.load(f, Loader=yaml.FullLoader)
generator = OcclusionAwareGenerator(**config['model_params']['generator_params'],
**config['model_params']['common_params'])
if not cpu:
generator.cuda()
kp_detector = KPDetector(**config['model_params']['kp_detector_params'],
**config['model_params']['common_params'])
if not cpu:
kp_detector.cuda()
if cpu:
checkpoint = torch.load(checkpoint_path, map_location=torch.device('cpu'))
else:
checkpoint = torch.load(checkpoint_path)
generator.load_state_dict(checkpoint['generator'])
kp_detector.load_state_dict(checkpoint['kp_detector'])
if not cpu:
generator = DataParallelWithCallback(generator)
kp_detector = DataParallelWithCallback(kp_detector)
generator.eval()
kp_detector.eval()
return generator, kp_detector
#Making Animations
def make_animation(source_image, driving_video, generator, kp_detector, relative=True, adapt_movement_scale=True, cpu=True):
with torch.no_grad():
predictions = []
source = torch.tensor(source_image[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2)
if not cpu:
source = source.cuda()
driving = torch.tensor(np.array(driving_video)[np.newaxis].astype(np.float32)).permute(0, 4, 1, 2, 3)
kp_source = kp_detector(source)
kp_driving_initial = kp_detector(driving[:, :, 0])
for frame_idx in tqdm(range(driving.shape[2])):
driving_frame = driving[:, :, frame_idx]
if not cpu:
driving_frame = driving_frame.cuda()
kp_driving = kp_detector(driving_frame)
kp_norm = normalize_kp(kp_source=kp_source, kp_driving=kp_driving,
kp_driving_initial=kp_driving_initial, use_relative_movement=relative,
use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale)
out = generator(source, kp_source=kp_source, kp_driving=kp_norm)
predictions.append(np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0])
return predictions
def tts(opt):
f = open("C:/Users/Moiz/Desktop/Jubi Work Local/Project - Final/inputs/test.txt","r")
if str(opt.voice).lower() == "male":
synthesize_text(f.read(),"B")
else:
synthesize_text(f.read(),"A")
def facial_animation():
va = sda.VideoAnimator(gpu=0, model_path="timit")# Instantiate the animator
vid, aud = va("config/image.bmp", "inputs/audio.wav")
va.save_video(vid, aud, "output/generated.mp4")
os.remove('inputs/audio.wav')
def fomm(opt):
source_image = imageio.imread(opt.source_image)
config = 'config/vox-256.yaml'
checkpoint = 'checkpoint/vox-cpk.pth.tar'
result_video = 'output/intermediate.mp4'
source_video = "output/generated.mp4"
reader = imageio.get_reader(source_video)
input = ffmpeg.input(source_video)
audioStream = input.audio
fps = reader.get_meta_data()['fps']
driving_video = []
try:
for im in reader:
driving_video.append(im)
except RuntimeError:
pass
reader.close()
source_image = resize(source_image, (256, 256))[..., :3]
driving_video = [resize(frame, (256, 256))[..., :3] for frame in driving_video]
generator, kp_detector = load_checkpoints(config_path=config, checkpoint_path=checkpoint, cpu=True)
#loading = time.time()
predictions = make_animation(source_image, driving_video, generator, kp_detector, relative=True, adapt_movement_scale=False, cpu=True)
#predict = time.time()
imageio.mimsave(result_video, [img_as_ubyte(frame) for frame in predictions], fps=fps)
videoStream = ffmpeg.input(result_video)
currenttime = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M')
out = ffmpeg.output(audioStream, videoStream, 'output/final_' + currenttime + '.mp4')
ffmpeg.run(out)
os.remove('output/intermediate.mp4')
os.remove('output/generated.mp4')
#end = time.time()
#Main function
if __name__ == "__main__":
#start = time.time()
parser = ArgumentParser()
parser.add_argument("--source_image", default='inputs/new_face_1.jpeg', help="path to source image")
parser.add_argument("--voice", default='male', help="choose gender")
opt = parser.parse_args()
tts(opt)
print("Text to Speech Complete - Audio Saved")
facial_animation()
print("Facial Animation Creation Complete")
print("Morphing created Facial Animations on input image.......")
fomm(opt)
print("Process Complete, Video Ready!")
# print("======================Time Statistics======================")
# print("Importing Libraries and Modules: " + str(round(start-initial,2)) + "s")
# print("Preprocessing and Loading Checkpoint: " + str(round(loading-start,2)) + "s")
# print("Predictions: " + str(round(predict-loading,2)) + "s")
# print("Video Editing: " + str(round(end-predict,2)) + "s")
# print("Total Time: " + str(round(end-initial,2)) + "s")