@@ -44,11 +44,65 @@ void IntelPTError::log(llvm::raw_ostream &OS) const {
44
44
OS << formatv (" : {0:x+16}" , m_address);
45
45
}
46
46
47
- int64_t DecodedThread::GetItemsCount () const {
48
- return static_cast <int64_t >(m_item_kinds.size ());
47
+ bool DecodedThread::TSCRange::InRange (uint64_t item_index) const {
48
+ return item_index >= first_item_index &&
49
+ item_index < first_item_index + items_count;
50
+ }
51
+
52
+ bool DecodedThread::NanosecondsRange::InRange (uint64_t item_index) const {
53
+ return item_index >= first_item_index &&
54
+ item_index < first_item_index + items_count;
55
+ }
56
+
57
+ double DecodedThread::NanosecondsRange::GetInterpolatedTime (
58
+ uint64_t item_index, uint64_t begin_of_time_nanos,
59
+ const LinuxPerfZeroTscConversion &tsc_conversion) const {
60
+ uint64_t items_since_last_tsc = item_index - first_item_index;
61
+
62
+ auto interpolate = [&](uint64_t next_range_start_ns) {
63
+ if (next_range_start_ns == nanos) {
64
+ // If the resolution of the conversion formula is bad enough to consider
65
+ // these two timestamps as equal, then we just increase the next one by 1
66
+ // for correction
67
+ next_range_start_ns++;
68
+ }
69
+ long double item_duration =
70
+ static_cast <long double >(items_count) / (next_range_start_ns - nanos);
71
+ return (nanos - begin_of_time_nanos) + items_since_last_tsc * item_duration;
72
+ };
73
+
74
+ if (!next_range) {
75
+ // If this is the last TSC range, so we have to extrapolate. In this case,
76
+ // we assume that each instruction took one TSC, which is what an
77
+ // instruction would take if no parallelism is achieved and the frequency
78
+ // multiplier is 1.
79
+ return interpolate (tsc_conversion.ToNanos (tsc + items_count));
80
+ }
81
+ if (items_count < (next_range->tsc - tsc)) {
82
+ // If the numbers of items in this range is less than the total TSC duration
83
+ // of this range, i.e. each instruction taking longer than 1 TSC, then we
84
+ // can assume that something else happened between these TSCs (e.g. a
85
+ // context switch, change to kernel, decoding errors, etc). In this case, we
86
+ // also assume that each instruction took 1 TSC. A proper way to improve
87
+ // this would be to analize the next events in the trace looking for context
88
+ // switches or trace disablement events, but for now, as we only want an
89
+ // approximation, we keep it simple. We are also guaranteed that the time in
90
+ // nanos of the next range is different to the current one, just because of
91
+ // the definition of a NanosecondsRange.
92
+ return interpolate (
93
+ std::min (tsc_conversion.ToNanos (tsc + items_count), next_range->nanos ));
94
+ }
95
+
96
+ // In this case, each item took less than 1 TSC, so some parallelism was
97
+ // achieved, which is an indication that we didn't suffered of any kind of
98
+ // interruption.
99
+ return interpolate (next_range->nanos );
49
100
}
50
101
51
- lldb::addr_t DecodedThread::GetInstructionLoadAddress (size_t item_index) const {
102
+ uint64_t DecodedThread::GetItemsCount () const { return m_item_kinds.size (); }
103
+
104
+ lldb::addr_t
105
+ DecodedThread::GetInstructionLoadAddress (uint64_t item_index) const {
52
106
return m_item_data[item_index].load_address ;
53
107
}
54
108
@@ -58,33 +112,69 @@ DecodedThread::TraceItemStorage &
58
112
DecodedThread::CreateNewTraceItem (lldb::TraceItemKind kind) {
59
113
m_item_kinds.push_back (kind);
60
114
m_item_data.emplace_back ();
115
+ if (m_last_tsc)
116
+ (*m_last_tsc)->second .items_count ++;
117
+ if (m_last_nanoseconds)
118
+ (*m_last_nanoseconds)->second .items_count ++;
61
119
return m_item_data.back ();
62
120
}
63
121
64
- void DecodedThread::NotifyTsc (uint64_t tsc) {
65
- if (!m_last_tsc || *m_last_tsc != tsc) {
66
- m_timestamps.emplace (m_item_kinds.size (), tsc);
67
- m_last_tsc = tsc;
122
+ void DecodedThread::NotifyTsc (TSC tsc) {
123
+ if (m_last_tsc && (*m_last_tsc)->second .tsc == tsc)
124
+ return ;
125
+
126
+ m_last_tsc =
127
+ m_tscs.emplace (GetItemsCount (), TSCRange{tsc, 0 , GetItemsCount ()}).first ;
128
+
129
+ if (m_tsc_conversion) {
130
+ uint64_t nanos = m_tsc_conversion->ToNanos (tsc);
131
+ if (!m_last_nanoseconds || (*m_last_nanoseconds)->second .nanos != nanos) {
132
+ m_last_nanoseconds =
133
+ m_nanoseconds
134
+ .emplace (GetItemsCount (), NanosecondsRange{nanos, tsc, nullptr , 0 ,
135
+ GetItemsCount ()})
136
+ .first ;
137
+ if (*m_last_nanoseconds != m_nanoseconds.begin ()) {
138
+ auto prev_range = prev (*m_last_nanoseconds);
139
+ prev_range->second .next_range = &(*m_last_nanoseconds)->second ;
140
+ }
141
+ }
68
142
}
143
+ AppendEvent (lldb::eTraceEventHWClockTick);
69
144
}
70
145
71
146
void DecodedThread::NotifyCPU (lldb::cpu_id_t cpu_id) {
72
147
if (!m_last_cpu || *m_last_cpu != cpu_id) {
73
- m_cpus.emplace (m_item_kinds. size (), cpu_id);
148
+ m_cpus.emplace (GetItemsCount (), cpu_id);
74
149
m_last_cpu = cpu_id;
75
150
AppendEvent (lldb::eTraceEventCPUChanged);
76
151
}
77
152
}
78
153
79
154
Optional<lldb::cpu_id_t >
80
- DecodedThread::GetCPUByIndex (uint64_t insn_index) const {
81
- // Could possibly optimize the search
82
- auto it = m_cpus.upper_bound (insn_index);
155
+ DecodedThread::GetCPUByIndex (uint64_t item_index) const {
156
+ auto it = m_cpus.upper_bound (item_index);
83
157
if (it == m_cpus.begin ())
84
158
return None;
85
159
return prev (it)->second ;
86
160
}
87
161
162
+ Optional<DecodedThread::TSCRange>
163
+ DecodedThread::GetTSCRangeByIndex (uint64_t item_index) const {
164
+ auto next_it = m_tscs.upper_bound (item_index);
165
+ if (next_it == m_tscs.begin ())
166
+ return None;
167
+ return prev (next_it)->second ;
168
+ }
169
+
170
+ Optional<DecodedThread::NanosecondsRange>
171
+ DecodedThread::GetNanosecondsRangeByIndex (uint64_t item_index) {
172
+ auto next_it = m_nanoseconds.upper_bound (item_index);
173
+ if (next_it == m_nanoseconds.begin ())
174
+ return None;
175
+ return prev (next_it)->second ;
176
+ }
177
+
88
178
void DecodedThread::AppendEvent (lldb::TraceEvent event) {
89
179
CreateNewTraceItem (lldb::eTraceItemKindEvent).event = event;
90
180
m_events_stats.RecordEvent (event);
@@ -134,90 +224,24 @@ void DecodedThread::EventsStats::RecordEvent(lldb::TraceEvent event) {
134
224
total_count++;
135
225
}
136
226
137
- Optional<DecodedThread::TscRange> DecodedThread::CalculateTscRange (
138
- size_t insn_index,
139
- const Optional<DecodedThread::TscRange> &hint_range) const {
140
- // We first try to check the given hint range in case we are traversing the
141
- // trace in short jumps. If that fails, then we do the more expensive
142
- // arbitrary lookup.
143
- if (hint_range) {
144
- Optional<TscRange> candidate_range;
145
- if (insn_index < hint_range->GetStartInstructionIndex ())
146
- candidate_range = hint_range->Prev ();
147
- else if (insn_index > hint_range->GetEndInstructionIndex ())
148
- candidate_range = hint_range->Next ();
149
- else
150
- candidate_range = hint_range;
151
-
152
- if (candidate_range && candidate_range->InRange (insn_index))
153
- return candidate_range;
154
- }
155
- // Now we do a more expensive lookup
156
- auto it = m_timestamps.upper_bound (insn_index);
157
- if (it == m_timestamps.begin ())
158
- return None;
159
-
160
- return TscRange (--it, *this );
161
- }
162
-
163
- lldb::TraceItemKind DecodedThread::GetItemKindByIndex (size_t item_index) const {
227
+ lldb::TraceItemKind
228
+ DecodedThread::GetItemKindByIndex (uint64_t item_index) const {
164
229
return static_cast <lldb::TraceItemKind>(m_item_kinds[item_index]);
165
230
}
166
231
167
- const char *DecodedThread::GetErrorByIndex (size_t item_index) const {
232
+ const char *DecodedThread::GetErrorByIndex (uint64_t item_index) const {
168
233
return m_item_data[item_index].error ;
169
234
}
170
235
171
- DecodedThread::DecodedThread (ThreadSP thread_sp) : m_thread_sp(thread_sp) {}
172
-
173
- lldb::TraceCursorUP DecodedThread::CreateNewCursor () {
174
- return std::make_unique<TraceCursorIntelPT>(m_thread_sp, shared_from_this ());
175
- }
236
+ DecodedThread::DecodedThread (
237
+ ThreadSP thread_sp,
238
+ const llvm::Optional<LinuxPerfZeroTscConversion> &tsc_conversion)
239
+ : m_thread_sp(thread_sp), m_tsc_conversion(tsc_conversion) {}
176
240
177
241
size_t DecodedThread::CalculateApproximateMemoryUsage () const {
178
242
return sizeof (TraceItemStorage) * m_item_data.size () +
179
243
sizeof (uint8_t ) * m_item_kinds.size () +
180
- (sizeof (size_t ) + sizeof (uint64_t )) * m_timestamps.size () +
181
- (sizeof (size_t ) + sizeof (lldb::cpu_id_t )) * m_cpus.size ();
182
- }
183
-
184
- DecodedThread::TscRange::TscRange (std::map<size_t , uint64_t >::const_iterator it,
185
- const DecodedThread &decoded_thread)
186
- : m_it(it), m_decoded_thread(&decoded_thread) {
187
- auto next_it = m_it;
188
- ++next_it;
189
- m_end_index = (next_it == m_decoded_thread->m_timestamps .end ())
190
- ? std::numeric_limits<uint64_t >::max ()
191
- : next_it->first - 1 ;
192
- }
193
-
194
- size_t DecodedThread::TscRange::GetTsc () const { return m_it->second ; }
195
-
196
- size_t DecodedThread::TscRange::GetStartInstructionIndex () const {
197
- return m_it->first ;
198
- }
199
-
200
- size_t DecodedThread::TscRange::GetEndInstructionIndex () const {
201
- return m_end_index;
202
- }
203
-
204
- bool DecodedThread::TscRange::InRange (size_t insn_index) const {
205
- return GetStartInstructionIndex () <= insn_index &&
206
- insn_index <= GetEndInstructionIndex ();
207
- }
208
-
209
- Optional<DecodedThread::TscRange> DecodedThread::TscRange::Next () const {
210
- auto next_it = m_it;
211
- ++next_it;
212
- if (next_it == m_decoded_thread->m_timestamps .end ())
213
- return None;
214
- return TscRange (next_it, *m_decoded_thread);
215
- }
216
-
217
- Optional<DecodedThread::TscRange> DecodedThread::TscRange::Prev () const {
218
- if (m_it == m_decoded_thread->m_timestamps .begin ())
219
- return None;
220
- auto prev_it = m_it;
221
- --prev_it;
222
- return TscRange (prev_it, *m_decoded_thread);
244
+ (sizeof (uint64_t ) + sizeof (TSC)) * m_tscs.size () +
245
+ (sizeof (uint64_t ) + sizeof (uint64_t )) * m_nanoseconds.size () +
246
+ (sizeof (uint64_t ) + sizeof (lldb::cpu_id_t )) * m_cpus.size ();
223
247
}
0 commit comments