Skip to content

Commit 0a06c3d

Browse files
authored
Use our own index to seek more accurately when it is available (#180)
1 parent b0bc30d commit 0a06c3d

File tree

4 files changed

+15
-6
lines changed

4 files changed

+15
-6
lines changed

src/torchcodec/decoders/_core/VideoDecoder.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -732,6 +732,18 @@ void VideoDecoder::maybeSeekToBeforeDesiredPts() {
732732
int firstActiveStreamIndex = *activeStreamIndices_.begin();
733733
const auto& firstStreamInfo = streams_[firstActiveStreamIndex];
734734
int64_t desiredPts = *maybeDesiredPts_ * firstStreamInfo.timeBase.den;
735+
736+
// For some encodings like H265, FFMPEG sometimes seeks past the point we
737+
// set as the max_ts. So we use our own index to give it the exact pts of
738+
// the key frame that we want to seek to.
739+
// See https://github.com/pytorch/torchcodec/issues/179 for more details.
740+
// See https://trac.ffmpeg.org/ticket/11137 for the underlying ffmpeg bug.
741+
if (!firstStreamInfo.keyFrames.empty()) {
742+
int desiredKeyFrameIndex =
743+
getKeyFrameIndexForPts(firstStreamInfo, desiredPts);
744+
desiredPts = firstStreamInfo.keyFrames[desiredKeyFrameIndex].pts;
745+
}
746+
735747
int ffmepgStatus = avformat_seek_file(
736748
formatContext_.get(),
737749
firstStreamInfo.streamIndex,

test/decoders/test_simple_video_decoder.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -321,12 +321,9 @@ def test_get_frame_displayed_at(self):
321321
assert isinstance(decoder.get_frame_displayed_at(6.02).duration_seconds, float)
322322

323323
def test_get_frame_displayed_at_h265(self):
324+
# Non-regression test for https://github.com/pytorch/torchcodec/issues/179
324325
decoder = SimpleVideoDecoder(H265_VIDEO.path)
325-
# Note that for H265, FFMPEG's seeking is not precise. Even though we ask to
326-
# seek with a max_ts=0.5, FFMPEG will seek beyond that point.
327-
# TODO: Revert use frame5 in the test below once it's fixed upstream:
328-
# https://trac.ffmpeg.org/ticket/11137
329-
ref_frame6 = H265_VIDEO.get_frame_by_name("frame000006")
326+
ref_frame6 = H265_VIDEO.get_frame_by_name("frame000005")
330327
assert_tensor_equal(ref_frame6, decoder.get_frame_displayed_at(0.5).data)
331328

332329
def test_get_frame_displayed_at_fails(self):

test/generate_reference_resources.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ ffmpeg -y -i "$VIDEO_PATH" -b:a 192K -vn "$VIDEO_PATH.audio.mp3"
4747
# ./configure --enable-nonfree --enable-gpl --prefix=$(readlink -f ../bin) --enable-libx265 --enable-rpath --extra-ldflags=-Wl,-rpath=$CONDA_PREFIX/lib --enable-filter=drawtext --enable-libfontconfig --enable-libfreetype --enable-libharfbuzz
4848
# ffmpeg -f lavfi -i color=size=128x128:duration=1:rate=10:color=blue -vf "drawtext=fontsize=30:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:text='Frame %{frame_num}'" -vcodec libx265 -pix_fmt yuv420p -g 2 -crf 10 h265_video.mp4 -y
4949
VIDEO_PATH=$RESOURCES_DIR/h265_video.mp4
50-
FRAMES=(6)
50+
FRAMES=(5)
5151
for frame in "${FRAMES[@]}"; do
5252
frame_name=$(printf "%06d" "$frame")
5353
ffmpeg -y -i "$VIDEO_PATH" -vf select="eq(n\,$frame)" -vsync vfr -q:v 2 "$VIDEO_PATH.frame$frame_name.bmp"

0 commit comments

Comments
 (0)