From 383186912aeb36455d7be3e37ef228efd8aa08ad Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Tue, 11 Feb 2025 18:07:47 +0700 Subject: [PATCH] feat: sync GPX with first GPS timestamp when available (#706) * feat: sync GPX with first GPS timestamp when available * add logging * fix negative timestamps --- mapillary_tools/camm/camm_builder.py | 28 ++++------ mapillary_tools/camm/camm_parser.py | 10 +--- .../geotag_videos_from_exiftool_video.py | 4 +- .../geotag/geotag_videos_from_video.py | 4 +- .../extract_video_data.py | 18 +----- .../extractors/base_parser.py | 16 ++++-- .../extractors/exiftool_xml_parser.py | 5 +- .../extractors/gpx_parser.py | 55 ++++++++++++++++--- 8 files changed, 78 insertions(+), 62 deletions(-) diff --git a/mapillary_tools/camm/camm_builder.py b/mapillary_tools/camm/camm_builder.py index d22c624a..83e4507c 100644 --- a/mapillary_tools/camm/camm_builder.py +++ b/mapillary_tools/camm/camm_builder.py @@ -76,18 +76,9 @@ def _create_edit_list_from_points( ) -> builder.BoxDict: entries: T.List[T.Dict] = [] - for idx, points in enumerate(point_segments): - if not points: - entries = [ - { - "media_time": 0, - "segment_duration": 0, - "media_rate_integer": 1, - "media_rate_fraction": 0, - } - ] - break + non_empty_point_segments = [points for points in point_segments if points] + for idx, points in enumerate(non_empty_point_segments): assert 0 <= points[0].time, ( f"expect non-negative point time but got {points[0]}" ) @@ -98,8 +89,10 @@ def _create_edit_list_from_points( if idx == 0: if 0 < points[0].time: segment_duration = int(points[0].time * movie_timescale) + # put an empty edit list entry to skip the initial gap entries.append( { + # If this field is set to –1, it is an empty edit "media_time": -1, "segment_duration": segment_duration, "media_rate_integer": 1, @@ -107,7 +100,6 @@ def _create_edit_list_from_points( } ) else: - assert point_segments[-1][-1].time <= points[0].time media_time = int(points[0].time * media_timescale) segment_duration = int((points[-1].time - points[0].time) * movie_timescale) entries.append( @@ -300,14 +292,18 @@ def _f( movie_timescale = builder.find_movie_timescale(moov_children) # make sure the precision of timedeltas not lower than 0.001 (1ms) media_timescale = max(1000, movie_timescale) - measurements = _multiplex(video_metadata.points, telemetry_measurements) + + # points with negative time are skipped + # TODO: interpolate first point at time == 0 + # TODO: measurements with negative times should be skipped too + points = [point for point in video_metadata.points if point.time >= 0] + + measurements = _multiplex(points, telemetry_measurements) camm_samples = list( convert_telemetry_to_raw_samples(measurements, media_timescale) ) camm_trak = create_camm_trak(camm_samples, media_timescale) - elst = _create_edit_list_from_points( - [video_metadata.points], movie_timescale, media_timescale - ) + elst = _create_edit_list_from_points([points], movie_timescale, media_timescale) if T.cast(T.Dict, elst["data"])["entries"]: T.cast(T.List[builder.BoxDict], camm_trak["data"]).append( { diff --git a/mapillary_tools/camm/camm_parser.py b/mapillary_tools/camm/camm_parser.py index a2271f99..508d73b0 100644 --- a/mapillary_tools/camm/camm_parser.py +++ b/mapillary_tools/camm/camm_parser.py @@ -146,10 +146,7 @@ def _filter_telemetry_by_elst_segments( if not elst: for m in measurements: - if dataclasses.is_dataclass(m): - yield dataclasses.replace(m, time=m.time + offset) - else: - m._replace(time=m.time + offset) + yield dataclasses.replace(m, time=m.time + offset) return elst.sort(key=lambda entry: entry[0]) @@ -161,10 +158,7 @@ def _filter_telemetry_by_elst_segments( if m.time < media_time: pass elif m.time <= media_time + duration: - if dataclasses.is_dataclass(m): - yield dataclasses.replace(m, time=m.time + offset) - else: - m._replace(time=m.time + offset) + yield dataclasses.replace(m, time=m.time + offset) else: elst_idx += 1 diff --git a/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py b/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py index 8f67aef8..c03f0c07 100644 --- a/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +++ b/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py @@ -4,11 +4,9 @@ from multiprocessing import Pool from pathlib import Path -from mapillary_tools import utils - from tqdm import tqdm -from .. import exceptions, exiftool_read, geo, types +from .. import exceptions, exiftool_read, geo, types, utils from ..exiftool_read_video import ExifToolReadVideo from ..telemetry import GPSPoint from . import gpmf_gps_filter, utils as video_utils diff --git a/mapillary_tools/geotag/geotag_videos_from_video.py b/mapillary_tools/geotag/geotag_videos_from_video.py index 7374ed96..833b5bc5 100644 --- a/mapillary_tools/geotag/geotag_videos_from_video.py +++ b/mapillary_tools/geotag/geotag_videos_from_video.py @@ -4,11 +4,9 @@ from multiprocessing import Pool from pathlib import Path -from mapillary_tools import utils - from tqdm import tqdm -from .. import exceptions, geo, types +from .. import exceptions, geo, types, utils from ..camm import camm_parser from ..mp4 import simple_mp4_parser as sparser from ..telemetry import GPSPoint diff --git a/mapillary_tools/video_data_extraction/extract_video_data.py b/mapillary_tools/video_data_extraction/extract_video_data.py index 442a4bfc..1d90c400 100644 --- a/mapillary_tools/video_data_extraction/extract_video_data.py +++ b/mapillary_tools/video_data_extraction/extract_video_data.py @@ -122,12 +122,7 @@ def _extract_points( {**log_vars, "points": len(points)}, ) - points = self._sanitize_points(points) - - if parser.must_rebase_times_to_zero: - points = self._rebase_times(points) - - return points + return self._sanitize_points(points) @staticmethod def _check_paths(import_paths: T.Sequence[Path]): @@ -179,14 +174,3 @@ def _sanitize_points(points: T.Sequence[geo.Point]) -> T.Sequence[geo.Point]: raise exceptions.MapillaryStationaryVideoError("Stationary video") return points - - @staticmethod - def _rebase_times(points: T.Sequence[geo.Point]): - """ - Make point times start from 0 - """ - if points: - first_timestamp = points[0].time - for p in points: - p.time = p.time - first_timestamp - return points diff --git a/mapillary_tools/video_data_extraction/extractors/base_parser.py b/mapillary_tools/video_data_extraction/extractors/base_parser.py index f50725b4..bd715f5a 100644 --- a/mapillary_tools/video_data_extraction/extractors/base_parser.py +++ b/mapillary_tools/video_data_extraction/extractors/base_parser.py @@ -33,11 +33,6 @@ def default_source_pattern(self) -> str: def parser_label(self) -> str: raise NotImplementedError - @property - @abc.abstractmethod - def must_rebase_times_to_zero(self) -> bool: - raise NotImplementedError - @abc.abstractmethod def extract_points(self) -> T.Sequence[geo.Point]: raise NotImplementedError @@ -67,3 +62,14 @@ def geotag_source_path(self) -> T.Optional[Path]: ).resolve() return abs_path if abs_path.is_file() else None + + @staticmethod + def _rebase_times(points: T.Sequence[geo.Point], offset: float = 0.0): + """ + Make point times start from 0 + """ + if points: + first_timestamp = points[0].time + for p in points: + p.time = (p.time - first_timestamp) + offset + return points diff --git a/mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py b/mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py index 2b17b53e..4cf91ea1 100644 --- a/mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py +++ b/mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py @@ -13,7 +13,6 @@ class ExiftoolXmlParser(BaseParser): default_source_pattern = "%g.xml" - must_rebase_times_to_zero = True parser_label = "exiftool_xml" exifToolReadVideo: T.Optional[ExifToolReadVideo] = None @@ -39,9 +38,11 @@ def __init__( self.exifToolReadVideo = ExifToolReadVideo(ET.ElementTree(element)) def extract_points(self) -> T.Sequence[geo.Point]: - return ( + gps_points = ( self.exifToolReadVideo.extract_gps_track() if self.exifToolReadVideo else [] ) + self._rebase_times(gps_points) + return gps_points def extract_make(self) -> T.Optional[str]: return self.exifToolReadVideo.extract_make() if self.exifToolReadVideo else None diff --git a/mapillary_tools/video_data_extraction/extractors/gpx_parser.py b/mapillary_tools/video_data_extraction/extractors/gpx_parser.py index c37bee66..c864ad38 100644 --- a/mapillary_tools/video_data_extraction/extractors/gpx_parser.py +++ b/mapillary_tools/video_data_extraction/extractors/gpx_parser.py @@ -1,27 +1,66 @@ +import datetime +import logging import typing as T -from ... import geo +from ... import geo, telemetry from ...geotag import geotag_images_from_gpx_file from .base_parser import BaseParser from .generic_video_parser import GenericVideoParser +LOG = logging.getLogger(__name__) + + class GpxParser(BaseParser): default_source_pattern = "%g.gpx" - must_rebase_times_to_zero = True parser_label = "gpx" def extract_points(self) -> T.Sequence[geo.Point]: path = self.geotag_source_path if not path: return [] - try: - tracks = geotag_images_from_gpx_file.parse_gpx(path) - except Exception: - return [] - points: T.Sequence[geo.Point] = sum(tracks, []) - return points + gpx_tracks = geotag_images_from_gpx_file.parse_gpx(path) + if 1 < len(gpx_tracks): + LOG.warning( + "Found %s tracks in the GPX file %s. Will merge points in all the tracks as a single track for interpolation", + len(gpx_tracks), + self.videoPath, + ) + + gpx_points: T.Sequence[geo.Point] = sum(gpx_tracks, []) + if not gpx_points: + return gpx_points + + first_gpx_dt = datetime.datetime.fromtimestamp( + gpx_points[0].time, tz=datetime.timezone.utc + ) + LOG.info("First GPX timestamp: %s", first_gpx_dt) + + # Extract first GPS timestamp (if found) for synchronization + offset: float = 0.0 + parser = GenericVideoParser(self.videoPath, self.options, self.parserOptions) + gps_points = parser.extract_points() + if gps_points: + first_gps_point = gps_points[0] + if isinstance(first_gps_point, telemetry.GPSPoint): + if first_gps_point.epoch_time is not None: + first_gps_dt = datetime.datetime.fromtimestamp( + first_gps_point.epoch_time, tz=datetime.timezone.utc + ) + LOG.info("First GPS timestamp: %s", first_gps_dt) + offset = gpx_points[0].time - first_gps_point.epoch_time + if offset: + LOG.warning( + "Found offset between GPX %s and video GPS timestamps %s: %s seconds", + first_gpx_dt, + first_gps_dt, + offset, + ) + + self._rebase_times(gpx_points, offset=offset) + + return gpx_points def extract_make(self) -> T.Optional[str]: parser = GenericVideoParser(self.videoPath, self.options, self.parserOptions)