7
7
import torchcodec
8
8
from torchvision .transforms import Resize
9
9
10
- RESIZED_WIDTH = 256
11
- RESIZED_HEIGHT = 256
12
10
13
-
14
- def transfer_and_resize_frame (frame , resize_device_string ):
11
+ def transfer_and_resize_frame (frame ):
15
12
# This should be a no-op if the frame is already on the GPU.
16
- frame = frame .to (resize_device_string )
17
- frame = Resize ((RESIZED_HEIGHT , RESIZED_WIDTH ))(frame )
13
+ frame = frame .to ("cuda:0" )
14
+ frame = Resize ((256 , 256 ))(frame )
18
15
return frame
19
16
20
17
21
- def decode_full_video (video_path , decode_device_string , resize_device_string ):
22
- # We use the core API instead of SimpleVideoDecoder because the core API
23
- # allows us to natively resize as part of the decode step.
24
- print (f"{ decode_device_string = } { resize_device_string = } " )
25
- decoder = torchcodec .decoders ._core .create_from_file (video_path )
26
- num_threads = None
27
- if "cuda" in decode_device_string :
28
- num_threads = 1
29
- width = None
30
- height = None
31
- if "native" in resize_device_string :
32
- width = RESIZED_WIDTH
33
- height = RESIZED_HEIGHT
34
- torchcodec .decoders ._core .add_video_stream (
35
- decoder ,
36
- stream_index = - 1 ,
37
- device_string = decode_device_string ,
38
- num_threads = num_threads ,
39
- width = width ,
40
- height = height ,
18
+ def decode_full_video (video_path , device_string , do_gpu_preproc ):
19
+ decoder = torchcodec .decoders .SimpleVideoDecoder (
20
+ video_path , device = torch .device (device_string )
41
21
)
42
-
43
22
start_time = time .time ()
44
23
frame_count = 0
45
- while True :
46
- try :
47
- frame , * _ = torchcodec .decoders ._core .get_next_frame (decoder )
48
- if resize_device_string != "none" and "native" not in resize_device_string :
49
- frame = transfer_and_resize_frame (frame , resize_device_string )
50
-
51
- frame_count += 1
52
- except Exception as e :
53
- print ("EXCEPTION" , e )
54
- break
55
-
24
+ for frame in decoder :
25
+ # You can do a resize to simulate extra preproc work that happens
26
+ # on the GPU by uncommenting the following line:
27
+ if do_gpu_preproc :
28
+ frame = transfer_and_resize_frame (frame )
29
+ frame_count += 1
56
30
end_time = time .time ()
57
31
elapsed = end_time - start_time
58
32
fps = frame_count / (end_time - start_time )
59
33
print (
60
- f"****** DECODED full video { decode_device_string = } { frame_count = } { elapsed = } { fps = } "
34
+ f"****** DECODED full video { device_string = } { frame_count = } { elapsed = } { fps = } "
61
35
)
62
36
return frame_count , end_time - start_time
63
37
@@ -70,12 +44,6 @@ def main():
70
44
type = str ,
71
45
help = "Comma-separated devices to test decoding on." ,
72
46
)
73
- parser .add_argument (
74
- "--resize_devices" ,
75
- default = "cuda:0,cpu,native,none" ,
76
- type = str ,
77
- help = "Comma-separated devices to test preroc (resize) on. Use 'none' to specify no resize." ,
78
- )
79
47
parser .add_argument (
80
48
"--video" ,
81
49
type = str ,
@@ -91,6 +59,15 @@ def main():
91
59
"to measure the cold start time."
92
60
),
93
61
)
62
+ parser .add_argument (
63
+ "--do_gpu_preproc" ,
64
+ action = argparse .BooleanOptionalAction ,
65
+ default = True ,
66
+ help = (
67
+ "Do a transfer to GPU and resize operation after the decode to "
68
+ "simulate a real-world transform."
69
+ ),
70
+ )
94
71
args = parser .parse_args ()
95
72
video_path = args .video
96
73
@@ -100,44 +77,29 @@ def main():
100
77
decode_full_video (video_path , device )
101
78
return
102
79
103
- resize_devices = args .resize_devices .split ("," )
104
- resize_devices = [d for d in resize_devices if d != "" ]
105
- if len (resize_devices ) == 0 :
106
- resize_devices .append ("none" )
107
-
108
- label = "Decode+Resize Time"
80
+ label = "Decode"
81
+ if args .do_gpu_preproc :
82
+ label += " + GPU Preproc"
83
+ label += " Time"
109
84
110
85
results = []
111
- for decode_device_string in args .devices .split ("," ):
112
- for resize_device_string in resize_devices :
113
- decode_label = decode_device_string
114
- if "cuda" in decode_label :
115
- # Shorten "cuda:0" to "cuda"
116
- decode_label = "cuda"
117
- resize_label = resize_device_string
118
- if "cuda" in resize_device_string :
119
- # Shorten "cuda:0" to "cuda"
120
- resize_label = "cuda"
121
- print ("decode_device" , decode_device_string )
122
- print ("resize_device" , resize_device_string )
123
- t = benchmark .Timer (
124
- stmt = "decode_full_video(video_path, decode_device_string, resize_device_string)" ,
125
- globals = {
126
- "decode_device_string" : decode_device_string ,
127
- "video_path" : video_path ,
128
- "decode_full_video" : decode_full_video ,
129
- "resize_device_string" : resize_device_string ,
130
- },
131
- label = label ,
132
- sub_label = f"video={ os .path .basename (video_path )} " ,
133
- description = f"D={ decode_label } ,R={ resize_label } " ,
134
- ).blocked_autorange ()
135
- results .append (t )
86
+ for device in args .devices .split ("," ):
87
+ print ("device" , device )
88
+ t = benchmark .Timer (
89
+ stmt = "decode_full_video(video_path, device, do_gpu_preproc)" ,
90
+ globals = {
91
+ "device" : device ,
92
+ "video_path" : video_path ,
93
+ "decode_full_video" : decode_full_video ,
94
+ "do_gpu_preproc" : args .do_gpu_preproc ,
95
+ },
96
+ label = label ,
97
+ sub_label = f"video={ os .path .basename (video_path )} " ,
98
+ description = f"decode_device={ device } " ,
99
+ ).blocked_autorange ()
100
+ results .append (t )
136
101
compare = benchmark .Compare (results )
137
102
compare .print ()
138
- print ("Key: D=Decode, R=Resize" )
139
- print ("Native resize is done as part of the decode step" )
140
- print ("none resize means there is no resize step -- native or otherwise" )
141
103
142
104
143
105
if __name__ == "__main__" :
0 commit comments