@@ -130,18 +130,103 @@ pub fn time<T, F>(do_it: bool, what: &str, f: F) -> T where
130
130
if cfg ! ( debug_assertions) {
131
131
profq_msg ( ProfileQueriesMsg :: TimeBegin ( what. to_string ( ) ) )
132
132
} ;
133
+
134
+ #[ cfg( not( all( windows, parallel_queries, any( target_arch = "x86" , target_arch = "x86_64" ) ) ) ) ]
135
+ let rv = time_impl ( what, f) ;
136
+ #[ cfg( all( windows, parallel_queries, any( target_arch = "x86" , target_arch = "x86_64" ) ) ) ]
137
+ let rv = time_threads_impl ( what, f) ;
138
+
139
+ TIME_DEPTH . with ( |slot| slot. set ( old) ) ;
140
+
141
+ rv
142
+ }
143
+
144
+ fn time_impl < T , F > ( what : & str , f : F ) -> T where
145
+ F : FnOnce ( ) -> T ,
146
+ {
133
147
let start = Instant :: now ( ) ;
134
148
let rv = f ( ) ;
135
149
let dur = start. elapsed ( ) ;
136
150
if cfg ! ( debug_assertions) {
137
151
profq_msg ( ProfileQueriesMsg :: TimeEnd )
138
152
} ;
153
+ print_time_passes_entry_internal ( what, duration_to_secs_str ( dur) ) ;
154
+ rv
155
+ }
139
156
140
- print_time_passes_entry_internal ( what, dur) ;
157
+ #[ cfg( all( windows, parallel_queries, any( target_arch = "x86" , target_arch = "x86_64" ) ) ) ]
158
+ fn time_threads_impl < T , F > ( what : & str , f : F ) -> T where
159
+ F : FnOnce ( ) -> T ,
160
+ {
161
+ use rayon_core:: registry;
162
+ use std:: iter;
163
+ use x86;
164
+ use winapi;
165
+ use kernel32;
166
+
167
+ let registry = registry:: get_current_registry ( ) ;
168
+ if let Some ( registry) = registry {
169
+ let freq = unsafe {
170
+ let mut freq = 0 ;
171
+ assert ! ( kernel32:: QueryPerformanceFrequency ( & mut freq) == winapi:: TRUE ) ;
172
+ freq as u64 * 1000
173
+ } ;
174
+
175
+ let threads: Vec < _ > = {
176
+ let threads = registry. handles . lock ( ) ;
177
+ let current = unsafe {
178
+ iter:: once ( kernel32:: GetCurrentThread ( ) )
179
+ } ;
180
+ current. chain ( threads. iter ( ) . map ( |t| t. 0 ) ) . collect ( )
181
+ } ;
182
+ let mut begin: Vec < u64 > = iter:: repeat ( 0 ) . take ( threads. len ( ) ) . collect ( ) ;
183
+ let mut end: Vec < u64 > = iter:: repeat ( 0 ) . take ( threads. len ( ) ) . collect ( ) ;
184
+ for ( i, & handle) in threads. iter ( ) . enumerate ( ) {
185
+ unsafe {
186
+ assert ! ( kernel32:: QueryThreadCycleTime ( handle, & mut begin[ i] ) == winapi:: TRUE ) ;
187
+ }
188
+ }
189
+ let time_start = unsafe { x86:: shared:: time:: rdtsc ( ) } ;
190
+ let result = f ( ) ;
191
+ let time_end = unsafe { x86:: shared:: time:: rdtsc ( ) } ;
192
+ for ( i, & handle) in threads. iter ( ) . enumerate ( ) {
193
+ unsafe {
194
+ assert ! ( kernel32:: QueryThreadCycleTime ( handle, & mut end[ i] ) == winapi:: TRUE ) ;
195
+ }
196
+ }
197
+ if cfg ! ( debug_assertions) {
198
+ profq_msg ( ProfileQueriesMsg :: TimeEnd )
199
+ } ;
200
+ let time = time_end - time_start;
201
+ let time_secs = time as f64 / freq as f64 ;
202
+
203
+ let thread_times: Vec < u64 > = end. iter ( ) . zip ( begin. iter ( ) ) . map ( |( e, b) | * e - * b) . collect ( ) ;
204
+
205
+ let total_thread_time: u64 = thread_times. iter ( ) . cloned ( ) . sum ( ) ;
206
+ let core_usage = total_thread_time as f64 / time as f64 ;
207
+
208
+ let mut data = format ! ( "{:.3} - cores {:.2}x - cpu {:.2}% - threads (" ,
209
+ time_secs,
210
+ core_usage,
211
+ core_usage * 100.0 / ( thread_times. len( ) - 1 ) as f64 ) ;
212
+
213
+ for ( i, thread_time) in thread_times. into_iter ( ) . enumerate ( ) {
214
+ data. push_str ( & format ! ( "{:.2}x" , thread_time as f64 / time as f64 ) ) ;
215
+ if i == 0 {
216
+ data. push_str ( " - " ) ;
217
+ }
218
+ else if i < begin. len ( ) - 1 {
219
+ data. push_str ( " " ) ;
220
+ }
221
+ }
141
222
142
- TIME_DEPTH . with ( |slot| slot . set ( old ) ) ;
223
+ data . push_str ( ")" ) ;
143
224
144
- rv
225
+ print_time_passes_entry_internal ( what, data) ;
226
+ result
227
+ } else {
228
+ time_impl ( what, f)
229
+ }
145
230
}
146
231
147
232
pub fn print_time_passes_entry ( do_it : bool , what : & str , dur : Duration ) {
@@ -155,12 +240,12 @@ pub fn print_time_passes_entry(do_it: bool, what: &str, dur: Duration) {
155
240
r
156
241
} ) ;
157
242
158
- print_time_passes_entry_internal ( what, dur) ;
243
+ print_time_passes_entry_internal ( what, duration_to_secs_str ( dur) ) ;
159
244
160
245
TIME_DEPTH . with ( |slot| slot. set ( old) ) ;
161
246
}
162
247
163
- fn print_time_passes_entry_internal ( what : & str , dur : Duration ) {
248
+ fn print_time_passes_entry_internal ( what : & str , data : String ) {
164
249
let indentation = TIME_DEPTH . with ( |slot| slot. get ( ) ) ;
165
250
166
251
let mem_string = match get_resident ( ) {
@@ -172,7 +257,7 @@ fn print_time_passes_entry_internal(what: &str, dur: Duration) {
172
257
} ;
173
258
println ! ( "{}time: {}{}\t {}" ,
174
259
repeat( " " ) . take( indentation) . collect:: <String >( ) ,
175
- duration_to_secs_str ( dur ) ,
260
+ data ,
176
261
mem_string,
177
262
what) ;
178
263
}
0 commit comments