acastanedam
diff --git a/‎lldb/include/lldb/Target/TraceCursor.h‎
Lines changed: 22 additions & 14 deletions b/‎lldb/include/lldb/Target/TraceCursor.h‎
Lines changed: 22 additions & 14 deletions
diff --git a/‎lldb/include/lldb/Target/TraceDumper.h‎
Lines changed: 4 additions & 3 deletions b/‎lldb/include/lldb/Target/TraceDumper.h‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h‎
Lines changed: 2 additions & 2 deletions b/‎lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎lldb/include/lldb/lldb-enumerations.h‎
Lines changed: 2 additions & 6 deletions b/‎lldb/include/lldb/lldb-enumerations.h‎
Lines changed: 2 additions & 6 deletions
diff --git a/‎lldb/source/Commands/CommandObjectThread.cpp‎
Lines changed: 1 addition & 1 deletion b/‎lldb/source/Commands/CommandObjectThread.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎lldb/source/Commands/Options.td‎
Lines changed: 6 additions & 4 deletions b/‎lldb/source/Commands/Options.td‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp‎
Lines changed: 111 additions & 87 deletions b/‎lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp‎
Lines changed: 111 additions & 87 deletions
@@ -215,7 +215,7 @@ class TraceCursor {
   ///   of this cursor.
   ExecutionContextRef &GetExecutionContextRef();
 
-  /// Instruction, event or error information
+  /// Trace item information (instructions, errors and events)
   /// \{
 
   /// \return
@@ -255,27 +255,35 @@ class TraceCursor {
   ///     The load address of the instruction the cursor is pointing at.
   virtual lldb::addr_t GetLoadAddress() const = 0;
 
-  /// Get the hardware counter of a given type associated with the current
-  /// instruction. Each architecture might support different counters. It might
-  /// happen that only some instructions of an entire trace have a given counter
-  /// associated with them.
-  ///
-  /// \param[in] counter_type
-  ///    The counter type.
-  /// \return
-  ///    The value of the counter or \b llvm::None if not available.
-  virtual llvm::Optional<uint64_t>
-  GetCounter(lldb::TraceCounter counter_type) const = 0;
-
   /// Get the CPU associated with the current trace item.
   ///
   /// This call might not be O(1), so it's suggested to invoke this method
-  /// whenever a cpu change event is fired.
+  /// whenever an eTraceEventCPUChanged event is fired.
   ///
   /// \return
   ///    The requested CPU id, or \a llvm::None if this information is
   ///    not available for the current item.
   virtual llvm::Optional<lldb::cpu_id_t> GetCPU() const = 0;
+
+  /// Get the last hardware clock value that was emitted before the current
+  /// trace item.
+  ///
+  /// This call might not be O(1), so it's suggested to invoke this method
+  /// whenever an eTraceEventHWClockTick event is fired.
+  ///
+  /// \return
+  ///     The requested HW clock value, or \a llvm::None if this information is
+  ///     not available for the current item.
+  virtual llvm::Optional<uint64_t> GetHWClock() const = 0;
+
+  /// Get the approximate wall clock time in nanoseconds at which the current
+  /// trace item was executed. Each trace plug-in has a different definition for
+  /// what time 0 means.
+  ///
+  /// \return
+  ///     The approximate wall clock time for the trace item, or \a llvm::None
+  ///     if not available.
+  virtual llvm::Optional<double> GetWallClockTime() const = 0;
   /// \}
 
 protected:
 
@@ -29,9 +29,9 @@ struct TraceDumperOptions {
   bool json = false;
   /// When dumping in JSON format, pretty print the output.
   bool pretty_print_json = false;
-  /// For each instruction, print the corresponding timestamp counter if
+  /// For each trace item, print the corresponding timestamp in nanoseconds if
   /// available.
-  bool show_tsc = false;
+  bool show_timestamps = false;
   /// Dump the events that happened between instructions.
   bool show_events = false;
   /// For each instruction, print the instruction kind.
@@ -61,7 +61,8 @@ class TraceDumper {
   struct TraceItem {
     lldb::user_id_t id;
     lldb::addr_t load_address;
-    llvm::Optional<uint64_t> tsc;
+    llvm::Optional<double> timestamp;
+    llvm::Optional<uint64_t> hw_clock;
     llvm::Optional<llvm::StringRef> error;
     llvm::Optional<lldb::TraceEvent> event;
     llvm::Optional<SymbolInfo> symbol_info;
 
@@ -91,8 +91,8 @@ struct LinuxPerfZeroTscConversion {
   /// nanoseconds) is defined by the kernel at boot time and has no particularly
   /// useful meaning. On the other hand, this value is constant for an entire
   /// trace session.
-  //  See 'time_zero' section of
-  //  https://man7.org/linux/man-pages/man2/perf_event_open.2.html
+  /// See 'time_zero' section of
+  /// https://man7.org/linux/man-pages/man2/perf_event_open.2.html
   ///
   /// \param[in] tsc
   ///   The TSC value to be converted.
 
@@ -1159,12 +1159,6 @@ enum SaveCoreStyle {
   eSaveCoreStackOnly = 3,
 };
 
-// Type of counter values associated with instructions in a trace.
-enum TraceCounter {
-  // Timestamp counter, like the one offered by Intel CPUs (TSC).
-  eTraceCounterTSC = 0,
-};
-
 /// Events that might happen during a trace session.
 enum TraceEvent {
   /// Tracing was disabled for some time due to a software trigger
@@ -1174,6 +1168,8 @@ enum TraceEvent {
   /// Event due to CPU change for a thread. This event is also fired when
   /// suddenly it's not possible to identify the cpu of a given thread.
   eTraceEventCPUChanged,
+  /// Event due to a CPU HW clock tick
+  eTraceEventHWClockTick,
 };
 
 // Enum used to identify which kind of item a \a TraceCursor is pointing at
 
@@ -2162,7 +2162,7 @@ class CommandObjectTraceDumpInstructions : public CommandObjectParsed {
         break;
       }
       case 't': {
-        m_dumper_options.show_tsc = true;
+        m_dumper_options.show_timestamps = true;
         break;
       }
       case 'e': {
 
@@ -1146,15 +1146,17 @@ let Command = "thread trace dump instructions" in {
     Desc<"Dump in simple JSON format.">;
   def thread_trace_dump_instructions_pretty_print: Option<"pretty-json", "J">,
     Group<1>,
-    Desc<"Dump in JSON format but pretty printing the output for easier readability.">;
+    Desc<"Dump in JSON format but pretty printing the output for easier "
+    "readability.">;
   def thread_trace_dump_instructions_show_kind : Option<"kind", "k">, Group<1>,
     Desc<"Show instruction control flow kind. Refer to the enum "
     "`InstructionControlFlowKind` for a list of control flow kind. "
     "As an important note, far jumps, far calls and far returns often indicate "
     "calls to and from kernel.">;
-  def thread_trace_dump_instructions_show_tsc : Option<"tsc", "t">, Group<1>,
-    Desc<"For each instruction, print the corresponding timestamp counter if "
-    "available.">;
+  def thread_trace_dump_instructions_show_timestamps: Option<"time", "t">,
+    Group<1>,
+    Desc<"For each trace item, print the corresponding wall clock timestamp "
+    "if available.">;
   def thread_trace_dump_instructions_show_events : Option<"events", "e">,
     Group<1>,
     Desc<"Dump the events that happened during the execution of the target.">;
 
@@ -44,11 +44,65 @@ void IntelPTError::log(llvm::raw_ostream &OS) const {
     OS << formatv(": {0:x+16}", m_address);
 }
 
-int64_t DecodedThread::GetItemsCount() const {
-  return static_cast<int64_t>(m_item_kinds.size());
+bool DecodedThread::TSCRange::InRange(uint64_t item_index) const {
+  return item_index >= first_item_index &&
+         item_index < first_item_index + items_count;
+}
+
+bool DecodedThread::NanosecondsRange::InRange(uint64_t item_index) const {
+  return item_index >= first_item_index &&
+         item_index < first_item_index + items_count;
+}
+
+double DecodedThread::NanosecondsRange::GetInterpolatedTime(
+    uint64_t item_index, uint64_t begin_of_time_nanos,
+    const LinuxPerfZeroTscConversion &tsc_conversion) const {
+  uint64_t items_since_last_tsc = item_index - first_item_index;
+
+  auto interpolate = [&](uint64_t next_range_start_ns) {
+    if (next_range_start_ns == nanos) {
+      // If the resolution of the conversion formula is bad enough to consider
+      // these two timestamps as equal, then we just increase the next one by 1
+      // for correction
+      next_range_start_ns++;
+    }
+    long double item_duration =
+        static_cast<long double>(items_count) / (next_range_start_ns - nanos);
+    return (nanos - begin_of_time_nanos) + items_since_last_tsc * item_duration;
+  };
+
+  if (!next_range) {
+    // If this is the last TSC range, so we have to extrapolate. In this case,
+    // we assume that each instruction took one TSC, which is what an
+    // instruction would take if no parallelism is achieved and the frequency
+    // multiplier is 1.
+    return interpolate(tsc_conversion.ToNanos(tsc + items_count));
+  }
+  if (items_count < (next_range->tsc - tsc)) {
+    // If the numbers of items in this range is less than the total TSC duration
+    // of this range, i.e. each instruction taking longer than 1 TSC, then we
+    // can assume that something else happened between these TSCs (e.g. a
+    // context switch, change to kernel, decoding errors, etc). In this case, we
+    // also assume that each instruction took 1 TSC. A proper way to improve
+    // this would be to analize the next events in the trace looking for context
+    // switches or trace disablement events, but for now, as we only want an
+    // approximation, we keep it simple. We are also guaranteed that the time in
+    // nanos of the next range is different to the current one, just because of
+    // the definition of a NanosecondsRange.
+    return interpolate(
+        std::min(tsc_conversion.ToNanos(tsc + items_count), next_range->nanos));
+  }
+
+  // In this case, each item took less than 1 TSC, so some parallelism was
+  // achieved, which is an indication that we didn't suffered of any kind of
+  // interruption.
+  return interpolate(next_range->nanos);
 }
 
-lldb::addr_t DecodedThread::GetInstructionLoadAddress(size_t item_index) const {
+uint64_t DecodedThread::GetItemsCount() const { return m_item_kinds.size(); }
+
+lldb::addr_t
+DecodedThread::GetInstructionLoadAddress(uint64_t item_index) const {
   return m_item_data[item_index].load_address;
 }
 
@@ -58,33 +112,69 @@ DecodedThread::TraceItemStorage &
 DecodedThread::CreateNewTraceItem(lldb::TraceItemKind kind) {
   m_item_kinds.push_back(kind);
   m_item_data.emplace_back();
+  if (m_last_tsc)
+    (*m_last_tsc)->second.items_count++;
+  if (m_last_nanoseconds)
+    (*m_last_nanoseconds)->second.items_count++;
   return m_item_data.back();
 }
 
-void DecodedThread::NotifyTsc(uint64_t tsc) {
-  if (!m_last_tsc || *m_last_tsc != tsc) {
-    m_timestamps.emplace(m_item_kinds.size(), tsc);
-    m_last_tsc = tsc;
+void DecodedThread::NotifyTsc(TSC tsc) {
+  if (m_last_tsc && (*m_last_tsc)->second.tsc == tsc)
+    return;
+
+  m_last_tsc =
+      m_tscs.emplace(GetItemsCount(), TSCRange{tsc, 0, GetItemsCount()}).first;
+
+  if (m_tsc_conversion) {
+    uint64_t nanos = m_tsc_conversion->ToNanos(tsc);
+    if (!m_last_nanoseconds || (*m_last_nanoseconds)->second.nanos != nanos) {
+      m_last_nanoseconds =
+          m_nanoseconds
+              .emplace(GetItemsCount(), NanosecondsRange{nanos, tsc, nullptr, 0,
+                                                         GetItemsCount()})
+              .first;
+      if (*m_last_nanoseconds != m_nanoseconds.begin()) {
+        auto prev_range = prev(*m_last_nanoseconds);
+        prev_range->second.next_range = &(*m_last_nanoseconds)->second;
+      }
+    }
   }
+  AppendEvent(lldb::eTraceEventHWClockTick);
 }
 
 void DecodedThread::NotifyCPU(lldb::cpu_id_t cpu_id) {
   if (!m_last_cpu || *m_last_cpu != cpu_id) {
-    m_cpus.emplace(m_item_kinds.size(), cpu_id);
+    m_cpus.emplace(GetItemsCount(), cpu_id);
     m_last_cpu = cpu_id;
     AppendEvent(lldb::eTraceEventCPUChanged);
   }
 }
 
 Optional<lldb::cpu_id_t>
-DecodedThread::GetCPUByIndex(uint64_t insn_index) const {
-  // Could possibly optimize the search
-  auto it = m_cpus.upper_bound(insn_index);
+DecodedThread::GetCPUByIndex(uint64_t item_index) const {
+  auto it = m_cpus.upper_bound(item_index);
   if (it == m_cpus.begin())
     return None;
   return prev(it)->second;
 }
 
+Optional<DecodedThread::TSCRange>
+DecodedThread::GetTSCRangeByIndex(uint64_t item_index) const {
+  auto next_it = m_tscs.upper_bound(item_index);
+  if (next_it == m_tscs.begin())
+    return None;
+  return prev(next_it)->second;
+}
+
+Optional<DecodedThread::NanosecondsRange>
+DecodedThread::GetNanosecondsRangeByIndex(uint64_t item_index) {
+  auto next_it = m_nanoseconds.upper_bound(item_index);
+  if (next_it == m_nanoseconds.begin())
+    return None;
+  return prev(next_it)->second;
+}
+
 void DecodedThread::AppendEvent(lldb::TraceEvent event) {
   CreateNewTraceItem(lldb::eTraceItemKindEvent).event = event;
   m_events_stats.RecordEvent(event);
@@ -134,90 +224,24 @@ void DecodedThread::EventsStats::RecordEvent(lldb::TraceEvent event) {
   total_count++;
 }
 
-Optional<DecodedThread::TscRange> DecodedThread::CalculateTscRange(
-    size_t insn_index,
-    const Optional<DecodedThread::TscRange> &hint_range) const {
-  // We first try to check the given hint range in case we are traversing the
-  // trace in short jumps. If that fails, then we do the more expensive
-  // arbitrary lookup.
-  if (hint_range) {
-    Optional<TscRange> candidate_range;
-    if (insn_index < hint_range->GetStartInstructionIndex())
-      candidate_range = hint_range->Prev();
-    else if (insn_index > hint_range->GetEndInstructionIndex())
-      candidate_range = hint_range->Next();
-    else
-      candidate_range = hint_range;
-
-    if (candidate_range && candidate_range->InRange(insn_index))
-      return candidate_range;
-  }
-  // Now we do a more expensive lookup
-  auto it = m_timestamps.upper_bound(insn_index);
-  if (it == m_timestamps.begin())
-    return None;
-
-  return TscRange(--it, *this);
-}
-
-lldb::TraceItemKind DecodedThread::GetItemKindByIndex(size_t item_index) const {
+lldb::TraceItemKind
+DecodedThread::GetItemKindByIndex(uint64_t item_index) const {
   return static_cast<lldb::TraceItemKind>(m_item_kinds[item_index]);
 }
 
-const char *DecodedThread::GetErrorByIndex(size_t item_index) const {
+const char *DecodedThread::GetErrorByIndex(uint64_t item_index) const {
   return m_item_data[item_index].error;
 }
 
-DecodedThread::DecodedThread(ThreadSP thread_sp) : m_thread_sp(thread_sp) {}
-
-lldb::TraceCursorUP DecodedThread::CreateNewCursor() {
-  return std::make_unique<TraceCursorIntelPT>(m_thread_sp, shared_from_this());
-}
+DecodedThread::DecodedThread(
+    ThreadSP thread_sp,
+    const llvm::Optional<LinuxPerfZeroTscConversion> &tsc_conversion)
+    : m_thread_sp(thread_sp), m_tsc_conversion(tsc_conversion) {}
 
 size_t DecodedThread::CalculateApproximateMemoryUsage() const {
   return sizeof(TraceItemStorage) * m_item_data.size() +
          sizeof(uint8_t) * m_item_kinds.size() +
-         (sizeof(size_t) + sizeof(uint64_t)) * m_timestamps.size() +
-         (sizeof(size_t) + sizeof(lldb::cpu_id_t)) * m_cpus.size();
-}
-
-DecodedThread::TscRange::TscRange(std::map<size_t, uint64_t>::const_iterator it,
-                                  const DecodedThread &decoded_thread)
-    : m_it(it), m_decoded_thread(&decoded_thread) {
-  auto next_it = m_it;
-  ++next_it;
-  m_end_index = (next_it == m_decoded_thread->m_timestamps.end())
-                    ? std::numeric_limits<uint64_t>::max()
-                    : next_it->first - 1;
-}
-
-size_t DecodedThread::TscRange::GetTsc() const { return m_it->second; }
-
-size_t DecodedThread::TscRange::GetStartInstructionIndex() const {
-  return m_it->first;
-}
-
-size_t DecodedThread::TscRange::GetEndInstructionIndex() const {
-  return m_end_index;
-}
-
-bool DecodedThread::TscRange::InRange(size_t insn_index) const {
-  return GetStartInstructionIndex() <= insn_index &&
-         insn_index <= GetEndInstructionIndex();
-}
-
-Optional<DecodedThread::TscRange> DecodedThread::TscRange::Next() const {
-  auto next_it = m_it;
-  ++next_it;
-  if (next_it == m_decoded_thread->m_timestamps.end())
-    return None;
-  return TscRange(next_it, *m_decoded_thread);
-}
-
-Optional<DecodedThread::TscRange> DecodedThread::TscRange::Prev() const {
-  if (m_it == m_decoded_thread->m_timestamps.begin())
-    return None;
-  auto prev_it = m_it;
-  --prev_it;
-  return TscRange(prev_it, *m_decoded_thread);
+         (sizeof(uint64_t) + sizeof(TSC)) * m_tscs.size() +
+         (sizeof(uint64_t) + sizeof(uint64_t)) * m_nanoseconds.size() +
+         (sizeof(uint64_t) + sizeof(lldb::cpu_id_t)) * m_cpus.size();
 }
Original file line number	Diff line number	Diff line change
`@@ -2162,7 +2162,7 @@ class CommandObjectTraceDumpInstructions : public CommandObjectParsed {`
`2162`	`2162`	`break;`
`2163`	`2163`	`}`
`2164`	`2164`	`case 't': {`
`2165`		`- m_dumper_options.show_tsc = true;`
	`2165`	`+ m_dumper_options.show_timestamps = true;`
`2166`	`2166`	`break;`
`2167`	`2167`	`}`
`2168`	`2168`	`case 'e': {`