Skip to content

Commit 0591065

Browse files
authored
Merge pull request #2020 from stan-dev/feature/flto-flags
Adds flags for compiler optimizations
2 parents 7ba6890 + 0ce5180 commit 0591065

File tree

4 files changed

+50
-22
lines changed

4 files changed

+50
-22
lines changed

make/compiler_flags

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,32 @@ CXX_MAJOR := $(shell $(CXX) -dumpversion 2>&1 | cut -d'.' -f1)
6262
CXX_MINOR := $(shell $(CXX) -dumpversion 2>&1 | cut -d'.' -f2)
6363

6464

65+
################################################################################
66+
# Set optional compiler flags for performance
67+
#
68+
# These flags are compiler and compiler version specific optimization flags.
69+
#
70+
# Because clang-6 and clang-7's -dumpversion reports version 4.2.1
71+
# and clang-6 needs either the llvm-ar or ar gold plugin to use link time
72+
# optimization (flto), flto is only turned on for clang versions greater than 8.
73+
# For gcc all versions after 4.9 can use the system `ar` and other utilities
74+
# when compiling with flto.
75+
#
76+
# FLAGS:
77+
# CXXFLAGS_OPTIM: Additional flags to CXXFLAGS
78+
# CPPFLAGS_OPTIM: Additional flags to CXXFLAGS
79+
# CXXFLAGS_OPTIM_TBB: Additional flags to TBB_CXXFLAGS
80+
# CXXFLAGS_OPTIM_SUNDIALS: Additional flags to CXXFLAGS_SUNDIALS
81+
# CPPFLAGS_OPTIM_SUNDIALS: Additional flags to CPPFLAGS_SUNDIALS
82+
# CXXFLAGS_FLTO: Additional flags for compiling with FLTO
83+
# CPPFLAGS_FLTO: Additional flags for compiling with FLTO
84+
# CXXFLAGS_FLTO_TBB: Additional flags for compiling tbb with flto
85+
# CXXFLAGS_FLTO_SUNDIALS: Additional flags for compiling c++ sundials with flto
86+
# CPPFLAGS_FLTO_SUNDIALS: Additional flags for compiling c sundials with flto
87+
# LDFLAGS_OPTIM: Adding optimization flags to LDFLAGS
88+
# LDFLAGS_FLTO: Adding flto options to LDFLAGS
89+
# LDFLAGS_MPI_FLTO: For adding flto options to MPI build
90+
##
6591

6692
################################################################################
6793
# Set default compiler flags
@@ -80,14 +106,14 @@ INC_GTEST ?= -I $(GTEST)/include -I $(GTEST)
80106

81107
## setup precompiler options
82108
CPPFLAGS_BOOST ?= -DBOOST_DISABLE_ASSERTS
83-
CPPFLAGS_SUNDIALS ?= -DNO_FPRINTF_OUTPUT
84-
CPPFLAGS_GTEST ?=
109+
CPPFLAGS_SUNDIALS ?= -DNO_FPRINTF_OUTPUT $(CPPFLAGS_OPTIM_SUNDIALS) $(CXXFLAGS_FLTO_SUNDIALS)
110+
#CPPFLAGS_GTEST ?=
85111

86112

87113
## setup compiler flags
88114
CXXFLAGS_LANG ?= -std=c++1y
89-
#CXXFLAGS_BOOST
90-
CXXFLAGS_SUNDIALS ?= -pipe
115+
#CXXFLAGS_BOOST ?=
116+
CXXFLAGS_SUNDIALS ?= -pipe $(CXXFLAGS_OPTIM_SUNDIALS) $(CPPFLAGS_FLTO_SUNDIALS)
91117
#CXXFLAGS_GTEST
92118

93119

@@ -224,7 +250,7 @@ endif
224250

225251

226252
CXXFLAGS_TBB ?= -I $(TBB)/include
227-
LDFLAGS_TBB ?= -Wl,-L,"$(TBB_BIN_ABSOLUTE_PATH)" -Wl,-rpath,"$(TBB_BIN_ABSOLUTE_PATH)"
253+
LDFLAGS_TBB ?= -Wl,-L,"$(TBB_BIN_ABSOLUTE_PATH)" -Wl,-rpath,"$(TBB_BIN_ABSOLUTE_PATH)" $(LDFLAGS_FLTO_FLTO) $(LDFLAGS_OPTIM_TBB)
228254
LDLIBS_TBB ?=
229255

230256
################################################################################
@@ -251,13 +277,13 @@ ifdef STAN_MPI
251277

252278
BOOST_LIBRARY_ABSOLUTE_PATH = $(abspath $(BOOST)/stage/lib)
253279

254-
LDFLAGS_MPI ?= -Wl,-L,"$(BOOST_LIBRARY_ABSOLUTE_PATH)" -Wl,-rpath,"$(BOOST_LIBRARY_ABSOLUTE_PATH)"
280+
LDFLAGS_MPI ?= -Wl,-L,"$(BOOST_LIBRARY_ABSOLUTE_PATH)" -Wl,-rpath,"$(BOOST_LIBRARY_ABSOLUTE_PATH)" $(LDFLAGS_MPI_FLTO) $(MPI_OPTIM)
255281
CXXFLAGS_MPI ?= -Wno-delete-non-virtual-dtor
256282
endif
257283

258-
CXXFLAGS += $(CXXFLAGS_LANG) $(CXXFLAGS_OS) $(CXXFLAGS_WARNINGS) $(CXXFLAGS_BOOST) $(CXXFLAGS_EIGEN) $(CXXFLAGS_OPENCL) $(CXXFLAGS_MPI) $(CXXFLAGS_THREADS) $(CXXFLAGS_TBB) -O$(O) $(INC)
259-
CPPFLAGS += $(CPPFLAGS_LANG) $(CPPFLAGS_OS) $(CPPFLAGS_WARNINGS) $(CPPFLAGS_BOOST) $(CPPFLAGS_EIGEN) $(CPPFLAGS_OPENCL) $(CPPFLAGS_MPI) $(CPPFLAGS_TBB)
260-
LDFLAGS += $(LDFLAGS_LANG) $(LDFLAGS_OS) $(LDFLAGS_WARNINGS) $(LDFLAGS_BOOST) $(LDFLAGS_EIGEN) $(LDFLAGS_OPENCL) $(LDFLAGS_MPI) $(LDFLAGS_TBB)
284+
CXXFLAGS += $(CXXFLAGS_LANG) $(CXXFLAGS_OS) $(CXXFLAGS_WARNINGS) $(CXXFLAGS_BOOST) $(CXXFLAGS_EIGEN) $(CXXFLAGS_OPENCL) $(CXXFLAGS_MPI) $(CXXFLAGS_THREADS) $(CXXFLAGS_TBB) $(CXXFLAGS_FLTO) $(CXXFLAGS_OPTIM) -O$(O) $(INC)
285+
CPPFLAGS += $(CPPFLAGS_LANG) $(CPPFLAGS_OS) $(CPPFLAGS_WARNINGS) $(CPPFLAGS_BOOST) $(CPPFLAGS_EIGEN) $(CPPFLAGS_OPENCL) $(CPPFLAGS_MPI) $(CPPFLAGS_TBB) $(CPPFLAGS_FLTO) $(CPPFLAGS_OPTIM)
286+
LDFLAGS += $(LDFLAGS_LANG) $(LDFLAGS_OS) $(LDFLAGS_WARNINGS) $(LDFLAGS_BOOST) $(LDFLAGS_EIGEN) $(LDFLAGS_OPENCL) $(LDFLAGS_MPI) $(LDFLAGS_TBB) $(LDFLAGS_FLTO) $(LDFLAGS_OPTIM)
261287
LDLIBS += $(LDLIBS_LANG) $(LDLIBS_OS) $(LDLIBS_WARNINGS) $(LDLIBS_BOOST) $(LDLIBS_EIGEN) $(LDLIBS_OPENCL) $(LDLIBS_MPI) $(LDLIBS_TBB)
262288

263289
.PHONY: print-compiler-flags
@@ -280,15 +306,20 @@ print-compiler-flags:
280306
@echo ' Compiler flags (each can be overriden separately):'
281307
@echo ' - CXXFLAGS_LANG ' $(CXXFLAGS_LANG)
282308
@echo ' - CXXFLAGS_WARNINGS ' $(CXXFLAGS_WARNINGS)
309+
@echo ' - CXXFLAGS_OPTIM ' $(CXXFLAGS_OPTIM)
310+
@echo ' - CXXFLAGS_FLTO ' $(CXXFLAGS_FLTO)
283311
@echo ' - CXXFLAGS_BOOST ' $(CXXFLAGS_BOOST)
284312
@echo ' - CXXFLAGS_EIGEN ' $(CXXFLAGS_EIGEN)
285313
@echo ' - CXXFLAGS_OS ' $(CXXFLAGS_OS)
286314
@echo ' - CXXFLAGS_GTEST ' $(CXXFLAGS_GTEST)
287315
@echo ' - CXXFLAGS_THREADS ' $(CXXFLAGS_THREADS)
288316
@echo ' - CXXFLAGS_OPENCL ' $(CXXFLAGS_OPENCL)
289317
@echo ' - CXXFLAGS_TBB ' $(CXXFLAGS_TBB)
318+
@echo ' - CXXFLAGS_OPTIM_TBB ' $(CXXFLAGS_OPTIM_TBB)
290319
@echo ' - CXXFLAGS_MPI ' $(CXXFLAGS_MPI)
291320
@echo ' - CFLAGS_SUNDIALS ' $(CFLAGS_SUNDIALS)
321+
@echo ' - CXXFLAGS_SUNDIALS ' $(CXXFLAGS_SUNDIALS)
322+
@echo ' - CXXFLAGS_OPTIM_SUNDIALS ' $(CXXFLAGS_OPTIM_SUNDIALS)
292323
@echo ' LDLIBS:'
293324
@echo ' - LDLIBS_LANG ' $(LDLIBS_LANG)
294325
@echo ' - LDLIBS_WARNINGS ' $(LDLIBS_WARNINGS)

make/libraries

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ SUNDIALS_KINSOL := $(patsubst %.c,%.o, \
5151
SUNDIALS_NVECSERIAL := $(patsubst %.c,%.o,\
5252
$(addprefix $(SUNDIALS)/src/, nvector/serial/nvector_serial.c sundials/sundials_math.c))
5353

54-
$(sort $(SUNDIALS_CVODES) $(SUNDIALS_IDAS) $(SUNDIALS_KINSOL) $(SUNDIALS_NVECSERIAL)) : CXXFLAGS = $(CXXFLAGS_SUNDIALS) $(CXXFLAGS_OS) -O$(O) $(INC_SUNDIALS)
55-
$(sort $(SUNDIALS_CVODES) $(SUNDIALS_IDAS) $(SUNDIALS_KINSOL) $(SUNDIALS_NVECSERIAL)) : CPPFLAGS = $(CPPFLAGS_SUNDIALS) $(CPPFLAGS_OS)
54+
$(sort $(SUNDIALS_CVODES) $(SUNDIALS_IDAS) $(SUNDIALS_KINSOL) $(SUNDIALS_NVECSERIAL)) : CXXFLAGS = $(CXXFLAGS_SUNDIALS) $(CXXFLAGS_OS) $(CXXFLAGS_OPTIM_SUNDIALS) -O$(O) $(INC_SUNDIALS)
55+
$(sort $(SUNDIALS_CVODES) $(SUNDIALS_IDAS) $(SUNDIALS_KINSOL) $(SUNDIALS_NVECSERIAL)) : CPPFLAGS = $(CPPFLAGS_SUNDIALS) $(CPPFLAGS_OS) $(CPPFLAGS_OPTIM_SUNDIALS) -O$(O)
5656
$(sort $(SUNDIALS_CVODES) $(SUNDIALS_IDAS) $(SUNDIALS_KINSOL) $(SUNDIALS_NVECSERIAL)) : %.o : %.c
5757
@mkdir -p $(dir $@)
5858
$(COMPILE.cpp) -x c -include $(SUNDIALS)/include/stan_sundials_printf_override.hpp $< $(OUTPUT_OPTION)
@@ -114,11 +114,11 @@ TBB_CXX_TYPE ?= $(CXX_TYPE)
114114
# Set c compiler used for the TBB
115115
ifeq (clang,$(CXX_TYPE))
116116
TBB_CC ?= $(subst clang++,clang,$(CXX))
117-
TBB_CXXFLAGS ?= -Wno-unknown-warning-option -Wno-deprecated-copy
117+
TBB_CXXFLAGS ?= -Wno-unknown-warning-option -Wno-deprecated-copy $(CXXFLAGS_OPTIM_TBB) $(CXXFLAGS_FLTO_TBB)
118118
endif
119119
ifeq (gcc,$(CXX_TYPE))
120120
TBB_CC ?= $(subst g++,gcc,$(CXX))
121-
TBB_CXXFLAGS ?= -Wno-unknown-warning-option -Wno-missing-attributes -Wno-class-memaccess -Wno-sized-deallocation
121+
TBB_CXXFLAGS ?= -Wno-unknown-warning-option -Wno-missing-attributes -Wno-class-memaccess -Wno-sized-deallocation $(CXXFLAGS_OPTIM_TBB) $(CXXFLAGS_FLTO_TBB)
122122
endif
123123
TBB_CC ?= $(CC)
124124

stan/math/rev/core/vari.hpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ class vari_base {
3030
*/
3131
virtual void chain() = 0;
3232
virtual void set_zero_adjoint() = 0;
33-
virtual ~vari_base() noexcept {}
3433

3534
/**
3635
* Allocate memory from the underlying memory pool. This memory is
@@ -104,7 +103,7 @@ class vari_value<T, require_floating_point_t<T>> : public vari_base {
104103
*/
105104
template <typename S, require_convertible_t<S&, T>* = nullptr>
106105
vari_value(S x) noexcept : val_(x), adj_(0.0) { // NOLINT
107-
ChainableStack::instance_->var_stack_.emplace_back(this);
106+
ChainableStack::instance_->var_stack_.push_back(this);
108107
}
109108

110109
/**
@@ -125,14 +124,12 @@ class vari_value<T, require_floating_point_t<T>> : public vari_base {
125124
template <typename S, require_convertible_t<S&, T>* = nullptr>
126125
vari_value(S x, bool stacked) noexcept : val_(x), adj_(0.0) {
127126
if (stacked) {
128-
ChainableStack::instance_->var_stack_.emplace_back(this);
127+
ChainableStack::instance_->var_stack_.push_back(this);
129128
} else {
130-
ChainableStack::instance_->var_nochain_stack_.emplace_back(this);
129+
ChainableStack::instance_->var_nochain_stack_.push_back(this);
131130
}
132131
}
133132

134-
~vari_value() = default;
135-
136133
inline void chain() {}
137134

138135
/**
@@ -272,9 +269,9 @@ class vari_value<T, require_eigen_dense_base_t<T>> : public vari_base {
272269
val_(eigen_map(val_mem_, x.rows(), x.cols()) = x),
273270
adj_(eigen_map(adj_mem_, x.rows(), x.cols()).setZero()) {
274271
if (stacked) {
275-
ChainableStack::instance_->var_stack_.emplace_back(this);
272+
ChainableStack::instance_->var_stack_.push_back(this);
276273
} else {
277-
ChainableStack::instance_->var_nochain_stack_.emplace_back(this);
274+
ChainableStack::instance_->var_nochain_stack_.push_back(this);
278275
}
279276
}
280277

stan/math/rev/fun/log.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ namespace stan {
1717
namespace math {
1818

1919
namespace internal {
20-
class log_vari : public op_v_vari {
20+
class log_vari final : public op_v_vari {
2121
public:
2222
explicit log_vari(vari* avi) : op_v_vari(std::log(avi->val_), avi) {}
2323
void chain() { avi_->adj_ += adj_ / avi_->val_; }

0 commit comments

Comments
 (0)