Skip to content

Commit 5d05b82

Browse files
committed
Print CPU usage for rayon threads in pass timings
1 parent 3ecd19f commit 5d05b82

File tree

5 files changed

+131
-8
lines changed

5 files changed

+131
-8
lines changed

src/Cargo.lock

Lines changed: 24 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/librustc/Cargo.toml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,16 @@ syntax = { path = "../libsyntax" }
2828
syntax_pos = { path = "../libsyntax_pos" }
2929
lazy_static = "1.0.0"
3030

31+
[target.'cfg(target_arch = "x86")'.dependencies]
32+
x86 = { git = "https://github.com/gz/rust-x86.git" }
33+
34+
[target.'cfg(target_arch = "x86_64")'.dependencies]
35+
x86 = { git = "https://github.com/gz/rust-x86.git" }
36+
37+
[target.'cfg(windows)'.dependencies]
38+
kernel32-sys = "0.2.2"
39+
winapi = "0.2.8"
40+
3141
# Note that these dependencies are a lie, they're just here to get linkage to
3242
# work.
3343
#

src/librustc/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,11 @@ extern crate getopts;
7878
extern crate graphviz;
7979
#[cfg(windows)]
8080
extern crate libc;
81+
#[cfg(windows)]
82+
extern crate kernel32;
83+
#[cfg(windows)]
84+
extern crate winapi;
85+
extern crate x86;
8186
extern crate owning_ref;
8287
extern crate rustc_back;
8388
#[macro_use] extern crate rustc_data_structures;

src/librustc/ty/maps/plumbing.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
102102
{
103103
if let Some((i, _)) = self.query().stack.iter().enumerate().rev()
104104
.find(|&(_, &(_, ref q))| q == query) {
105+
println!(" cycle_check failed for query {:?}, span: {:?}", query, span);
105106
Err(CycleError {
106107
span,
107108
cycle: self.query().stack[i..].iter().cloned().collect(),

src/librustc/util/common.rs

Lines changed: 91 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -130,18 +130,103 @@ pub fn time<T, F>(do_it: bool, what: &str, f: F) -> T where
130130
if cfg!(debug_assertions) {
131131
profq_msg(ProfileQueriesMsg::TimeBegin(what.to_string()))
132132
};
133+
134+
#[cfg(not(all(windows, parallel_queries, any(target_arch = "x86", target_arch = "x86_64"))))]
135+
let rv = time_impl(what, f);
136+
#[cfg(all(windows, parallel_queries, any(target_arch = "x86", target_arch = "x86_64")))]
137+
let rv = time_threads_impl(what, f);
138+
139+
TIME_DEPTH.with(|slot| slot.set(old));
140+
141+
rv
142+
}
143+
144+
fn time_impl<T, F>(what: &str, f: F) -> T where
145+
F: FnOnce() -> T,
146+
{
133147
let start = Instant::now();
134148
let rv = f();
135149
let dur = start.elapsed();
136150
if cfg!(debug_assertions) {
137151
profq_msg(ProfileQueriesMsg::TimeEnd)
138152
};
153+
print_time_passes_entry_internal(what, duration_to_secs_str(dur));
154+
rv
155+
}
139156

140-
print_time_passes_entry_internal(what, dur);
157+
#[cfg(all(windows, parallel_queries, any(target_arch = "x86", target_arch = "x86_64")))]
158+
fn time_threads_impl<T, F>(what: &str, f: F) -> T where
159+
F: FnOnce() -> T,
160+
{
161+
use rayon_core::registry;
162+
use std::iter;
163+
use x86;
164+
use winapi;
165+
use kernel32;
166+
167+
let registry = registry::get_current_registry();
168+
if let Some(registry) = registry {
169+
let freq = unsafe {
170+
let mut freq = 0;
171+
assert!(kernel32::QueryPerformanceFrequency(&mut freq) == winapi::TRUE);
172+
freq as u64 * 1000
173+
};
174+
175+
let threads: Vec<_> = {
176+
let threads = registry.handles.lock();
177+
let current = unsafe {
178+
iter::once(kernel32::GetCurrentThread())
179+
};
180+
current.chain(threads.iter().map(|t| t.0)).collect()
181+
};
182+
let mut begin: Vec<u64> = iter::repeat(0).take(threads.len()).collect();
183+
let mut end: Vec<u64> = iter::repeat(0).take(threads.len()).collect();
184+
for (i, &handle) in threads.iter().enumerate() {
185+
unsafe {
186+
assert!(kernel32::QueryThreadCycleTime(handle, &mut begin[i]) == winapi::TRUE);
187+
}
188+
}
189+
let time_start = unsafe { x86::shared::time::rdtsc() };
190+
let result = f();
191+
let time_end = unsafe { x86::shared::time::rdtsc() };
192+
for (i, &handle) in threads.iter().enumerate() {
193+
unsafe {
194+
assert!(kernel32::QueryThreadCycleTime(handle, &mut end[i]) == winapi::TRUE);
195+
}
196+
}
197+
if cfg!(debug_assertions) {
198+
profq_msg(ProfileQueriesMsg::TimeEnd)
199+
};
200+
let time = time_end - time_start;
201+
let time_secs = time as f64 / freq as f64;
202+
203+
let thread_times: Vec<u64> = end.iter().zip(begin.iter()).map(|(e, b)| *e - *b).collect();
204+
205+
let total_thread_time: u64 = thread_times.iter().cloned().sum();
206+
let core_usage = total_thread_time as f64 / time as f64;
207+
208+
let mut data = format!("{:.3} - cores {:.2}x - cpu {:.2}% - threads (",
209+
time_secs,
210+
core_usage,
211+
core_usage * 100.0 / (thread_times.len() - 1) as f64);
212+
213+
for (i, thread_time) in thread_times.into_iter().enumerate() {
214+
data.push_str(&format!("{:.2}x", thread_time as f64 / time as f64));
215+
if i == 0 {
216+
data.push_str(" - ");
217+
}
218+
else if i < begin.len() - 1 {
219+
data.push_str(" ");
220+
}
221+
}
141222

142-
TIME_DEPTH.with(|slot| slot.set(old));
223+
data.push_str(")");
143224

144-
rv
225+
print_time_passes_entry_internal(what, data);
226+
result
227+
} else {
228+
time_impl(what, f)
229+
}
145230
}
146231

147232
pub fn print_time_passes_entry(do_it: bool, what: &str, dur: Duration) {
@@ -155,12 +240,12 @@ pub fn print_time_passes_entry(do_it: bool, what: &str, dur: Duration) {
155240
r
156241
});
157242

158-
print_time_passes_entry_internal(what, dur);
243+
print_time_passes_entry_internal(what, duration_to_secs_str(dur));
159244

160245
TIME_DEPTH.with(|slot| slot.set(old));
161246
}
162247

163-
fn print_time_passes_entry_internal(what: &str, dur: Duration) {
248+
fn print_time_passes_entry_internal(what: &str, data: String) {
164249
let indentation = TIME_DEPTH.with(|slot| slot.get());
165250

166251
let mem_string = match get_resident() {
@@ -172,7 +257,7 @@ fn print_time_passes_entry_internal(what: &str, dur: Duration) {
172257
};
173258
println!("{}time: {}{}\t{}",
174259
repeat(" ").take(indentation).collect::<String>(),
175-
duration_to_secs_str(dur),
260+
data,
176261
mem_string,
177262
what);
178263
}

0 commit comments

Comments
 (0)