Skip to content

Commit f938db8

Browse files
committed
Revert "[torchcodec] Improve benchmark to test all combinations"
This reverts commit 1025e82.
1 parent f267591 commit f938db8

File tree

1 file changed

+41
-79
lines changed

1 file changed

+41
-79
lines changed

benchmarks/decoders/gpu_benchmark.py

Lines changed: 41 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -7,57 +7,31 @@
77
import torchcodec
88
from torchvision.transforms import Resize
99

10-
RESIZED_WIDTH = 256
11-
RESIZED_HEIGHT = 256
1210

13-
14-
def transfer_and_resize_frame(frame, resize_device_string):
11+
def transfer_and_resize_frame(frame):
1512
# This should be a no-op if the frame is already on the GPU.
16-
frame = frame.to(resize_device_string)
17-
frame = Resize((RESIZED_HEIGHT, RESIZED_WIDTH))(frame)
13+
frame = frame.to("cuda:0")
14+
frame = Resize((256, 256))(frame)
1815
return frame
1916

2017

21-
def decode_full_video(video_path, decode_device_string, resize_device_string):
22-
# We use the core API instead of SimpleVideoDecoder because the core API
23-
# allows us to natively resize as part of the decode step.
24-
print(f"{decode_device_string=} {resize_device_string=}")
25-
decoder = torchcodec.decoders._core.create_from_file(video_path)
26-
num_threads = None
27-
if "cuda" in decode_device_string:
28-
num_threads = 1
29-
width = None
30-
height = None
31-
if "native" in resize_device_string:
32-
width = RESIZED_WIDTH
33-
height = RESIZED_HEIGHT
34-
torchcodec.decoders._core.add_video_stream(
35-
decoder,
36-
stream_index=-1,
37-
device_string=decode_device_string,
38-
num_threads=num_threads,
39-
width=width,
40-
height=height,
18+
def decode_full_video(video_path, device_string, do_gpu_preproc):
19+
decoder = torchcodec.decoders.SimpleVideoDecoder(
20+
video_path, device=torch.device(device_string)
4121
)
42-
4322
start_time = time.time()
4423
frame_count = 0
45-
while True:
46-
try:
47-
frame, *_ = torchcodec.decoders._core.get_next_frame(decoder)
48-
if resize_device_string != "none" and "native" not in resize_device_string:
49-
frame = transfer_and_resize_frame(frame, resize_device_string)
50-
51-
frame_count += 1
52-
except Exception as e:
53-
print("EXCEPTION", e)
54-
break
55-
24+
for frame in decoder:
25+
# You can do a resize to simulate extra preproc work that happens
26+
# on the GPU by uncommenting the following line:
27+
if do_gpu_preproc:
28+
frame = transfer_and_resize_frame(frame)
29+
frame_count += 1
5630
end_time = time.time()
5731
elapsed = end_time - start_time
5832
fps = frame_count / (end_time - start_time)
5933
print(
60-
f"****** DECODED full video {decode_device_string=} {frame_count=} {elapsed=} {fps=}"
34+
f"****** DECODED full video {device_string=} {frame_count=} {elapsed=} {fps=}"
6135
)
6236
return frame_count, end_time - start_time
6337

@@ -70,12 +44,6 @@ def main():
7044
type=str,
7145
help="Comma-separated devices to test decoding on.",
7246
)
73-
parser.add_argument(
74-
"--resize_devices",
75-
default="cuda:0,cpu,native,none",
76-
type=str,
77-
help="Comma-separated devices to test preroc (resize) on. Use 'none' to specify no resize.",
78-
)
7947
parser.add_argument(
8048
"--video",
8149
type=str,
@@ -91,6 +59,15 @@ def main():
9159
"to measure the cold start time."
9260
),
9361
)
62+
parser.add_argument(
63+
"--do_gpu_preproc",
64+
action=argparse.BooleanOptionalAction,
65+
default=True,
66+
help=(
67+
"Do a transfer to GPU and resize operation after the decode to "
68+
"simulate a real-world transform."
69+
),
70+
)
9471
args = parser.parse_args()
9572
video_path = args.video
9673

@@ -100,44 +77,29 @@ def main():
10077
decode_full_video(video_path, device)
10178
return
10279

103-
resize_devices = args.resize_devices.split(",")
104-
resize_devices = [d for d in resize_devices if d != ""]
105-
if len(resize_devices) == 0:
106-
resize_devices.append("none")
107-
108-
label = "Decode+Resize Time"
80+
label = "Decode"
81+
if args.do_gpu_preproc:
82+
label += " + GPU Preproc"
83+
label += " Time"
10984

11085
results = []
111-
for decode_device_string in args.devices.split(","):
112-
for resize_device_string in resize_devices:
113-
decode_label = decode_device_string
114-
if "cuda" in decode_label:
115-
# Shorten "cuda:0" to "cuda"
116-
decode_label = "cuda"
117-
resize_label = resize_device_string
118-
if "cuda" in resize_device_string:
119-
# Shorten "cuda:0" to "cuda"
120-
resize_label = "cuda"
121-
print("decode_device", decode_device_string)
122-
print("resize_device", resize_device_string)
123-
t = benchmark.Timer(
124-
stmt="decode_full_video(video_path, decode_device_string, resize_device_string)",
125-
globals={
126-
"decode_device_string": decode_device_string,
127-
"video_path": video_path,
128-
"decode_full_video": decode_full_video,
129-
"resize_device_string": resize_device_string,
130-
},
131-
label=label,
132-
sub_label=f"video={os.path.basename(video_path)}",
133-
description=f"D={decode_label},R={resize_label}",
134-
).blocked_autorange()
135-
results.append(t)
86+
for device in args.devices.split(","):
87+
print("device", device)
88+
t = benchmark.Timer(
89+
stmt="decode_full_video(video_path, device, do_gpu_preproc)",
90+
globals={
91+
"device": device,
92+
"video_path": video_path,
93+
"decode_full_video": decode_full_video,
94+
"do_gpu_preproc": args.do_gpu_preproc,
95+
},
96+
label=label,
97+
sub_label=f"video={os.path.basename(video_path)}",
98+
description=f"decode_device={device}",
99+
).blocked_autorange()
100+
results.append(t)
136101
compare = benchmark.Compare(results)
137102
compare.print()
138-
print("Key: D=Decode, R=Resize")
139-
print("Native resize is done as part of the decode step")
140-
print("none resize means there is no resize step -- native or otherwise")
141103

142104

143105
if __name__ == "__main__":

0 commit comments

Comments
 (0)