Skip to content

Commit b802554

Browse files
committed
Auto merge of rust-lang#137586 - nnethercote:SetImpliedBits, r=bjorn3
Speed up target feature computation The LLVM backend calls `LLVMRustHasFeature` twice for every feature. In short-running rustc invocations, this accounts for a surprising amount of work. r? `@bjorn3`
2 parents 454b083 + b2b94f4 commit b802554

File tree

2 files changed

+38
-32
lines changed

2 files changed

+38
-32
lines changed

src/gcc_util.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ pub(crate) fn global_gcc_features(sess: &Session, diagnostics: bool) -> Vec<Stri
4848
for feature in sess.opts.cg.target_feature.split(',') {
4949
if let Some(feature) = feature.strip_prefix('+') {
5050
all_rust_features.extend(
51-
UnordSet::from(sess.target.implied_target_features(std::iter::once(feature)))
51+
UnordSet::from(sess.target.implied_target_features(feature))
5252
.to_sorted_stable_ord()
5353
.iter()
5454
.map(|&&s| (true, s)),

src/lib.rs

Lines changed: 37 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -259,8 +259,8 @@ impl CodegenBackend for GccCodegenBackend {
259259
.join(sess)
260260
}
261261

262-
fn target_features_cfg(&self, sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
263-
target_features_cfg(sess, allow_unstable, &self.target_info)
262+
fn target_features_cfg(&self, sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
263+
target_features_cfg(sess, &self.target_info)
264264
}
265265
}
266266

@@ -486,35 +486,41 @@ fn to_gcc_opt_level(optlevel: Option<OptLevel>) -> OptimizationLevel {
486486
/// Returns the features that should be set in `cfg(target_feature)`.
487487
fn target_features_cfg(
488488
sess: &Session,
489-
allow_unstable: bool,
490489
target_info: &LockedTargetInfo,
491-
) -> Vec<Symbol> {
490+
) -> (Vec<Symbol>, Vec<Symbol>) {
492491
// TODO(antoyo): use global_gcc_features.
493-
sess.target
494-
.rust_target_features()
495-
.iter()
496-
.filter_map(|&(feature, gate, _)| {
497-
if allow_unstable
498-
|| (gate.in_cfg() && (sess.is_nightly_build() || gate.requires_nightly().is_none()))
499-
{
500-
Some(feature)
501-
} else {
502-
None
503-
}
504-
})
505-
.filter(|feature| {
506-
// TODO: we disable Neon for now since we don't support the LLVM intrinsics for it.
507-
if *feature == "neon" {
508-
return false;
509-
}
510-
target_info.cpu_supports(feature)
511-
/*
512-
adx, aes, avx, avx2, avx512bf16, avx512bitalg, avx512bw, avx512cd, avx512dq, avx512er, avx512f, avx512fp16, avx512ifma,
513-
avx512pf, avx512vbmi, avx512vbmi2, avx512vl, avx512vnni, avx512vp2intersect, avx512vpopcntdq,
514-
bmi1, bmi2, cmpxchg16b, ermsb, f16c, fma, fxsr, gfni, lzcnt, movbe, pclmulqdq, popcnt, rdrand, rdseed, rtm,
515-
sha, sse, sse2, sse3, sse4.1, sse4.2, sse4a, ssse3, tbm, vaes, vpclmulqdq, xsave, xsavec, xsaveopt, xsaves
516-
*/
517-
})
518-
.map(Symbol::intern)
519-
.collect()
492+
let f = |allow_unstable| {
493+
sess.target
494+
.rust_target_features()
495+
.iter()
496+
.filter_map(|&(feature, gate, _)| {
497+
if allow_unstable
498+
|| (gate.in_cfg()
499+
&& (sess.is_nightly_build() || gate.requires_nightly().is_none()))
500+
{
501+
Some(feature)
502+
} else {
503+
None
504+
}
505+
})
506+
.filter(|feature| {
507+
// TODO: we disable Neon for now since we don't support the LLVM intrinsics for it.
508+
if *feature == "neon" {
509+
return false;
510+
}
511+
target_info.cpu_supports(feature)
512+
/*
513+
adx, aes, avx, avx2, avx512bf16, avx512bitalg, avx512bw, avx512cd, avx512dq, avx512er, avx512f, avx512fp16, avx512ifma,
514+
avx512pf, avx512vbmi, avx512vbmi2, avx512vl, avx512vnni, avx512vp2intersect, avx512vpopcntdq,
515+
bmi1, bmi2, cmpxchg16b, ermsb, f16c, fma, fxsr, gfni, lzcnt, movbe, pclmulqdq, popcnt, rdrand, rdseed, rtm,
516+
sha, sse, sse2, sse3, sse4.1, sse4.2, sse4a, ssse3, tbm, vaes, vpclmulqdq, xsave, xsavec, xsaveopt, xsaves
517+
*/
518+
})
519+
.map(Symbol::intern)
520+
.collect()
521+
};
522+
523+
let target_features = f(false);
524+
let unstable_target_features = f(true);
525+
(target_features, unstable_target_features)
520526
}

0 commit comments

Comments
 (0)