Skip to content

Adds flags for compiler optimizations #2020

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 33 commits into from
Aug 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
5fa5bf9
turn on flto everywhere
SteveBronder Aug 11, 2020
2dba173
Let everything compile with same optimization level
SteveBronder Aug 11, 2020
a3045c0
Let everything compile with same optimization level
SteveBronder Aug 11, 2020
18b7d8a
remove flto for gcc
SteveBronder Aug 11, 2020
efdde69
use linker plugin
SteveBronder Aug 11, 2020
80a0550
only use flto for gcc>9
SteveBronder Aug 11, 2020
2c37c41
use llvm-ar version for clang++
SteveBronder Aug 11, 2020
2064049
Set default visibility to hidden
SteveBronder Aug 11, 2020
a5a399a
Set default visibility for stan final program to hidden
SteveBronder Aug 11, 2020
ef3da99
turn off visiblity=hidden stuff
SteveBronder Aug 11, 2020
9ce6ced
Add gcc specific compiler options
SteveBronder Aug 11, 2020
d6f96b9
turn off visiblity=hidden stuff
SteveBronder Aug 11, 2020
d026177
simpler gcc flags
SteveBronder Aug 11, 2020
a211d85
simpler gcc flags
SteveBronder Aug 11, 2020
cae120d
simpler gcc flags
SteveBronder Aug 11, 2020
57f124d
move new and delete
SteveBronder Aug 11, 2020
426f92c
move new and delete
SteveBronder Aug 11, 2020
5dc547e
remove stack reserve
SteveBronder Aug 12, 2020
cdea742
update flags
SteveBronder Aug 13, 2020
490a274
update to have allow custom flags for sundials and tbb
SteveBronder Aug 13, 2020
6e35d5e
update to have allow custom flags for sundials and tbb
SteveBronder Aug 13, 2020
7e1ed32
update to have allow custom flags for sundials and tbb
SteveBronder Aug 14, 2020
eab4240
update to have allow custom flags for sundials and tbb
SteveBronder Aug 14, 2020
0dbd658
add sundials optims
SteveBronder Aug 14, 2020
aa3f147
final version of compiler flags
SteveBronder Aug 18, 2020
dbd5e7d
merge to develop
SteveBronder Aug 18, 2020
28b0266
use gold linker instead of explicit llvm linker
SteveBronder Aug 18, 2020
b94684b
use ld flag
SteveBronder Aug 18, 2020
b1e985a
doc optimization compiler flags
SteveBronder Aug 18, 2020
6aaee23
move optimization flags for gcc so they are only turned on for compil…
SteveBronder Aug 18, 2020
3ead9cb
remove jobserver value for flto flag
SteveBronder Aug 18, 2020
506b98a
only turn on flto for gcc>8
SteveBronder Aug 20, 2020
0ce5180
Remove setting optim flags and fix docs for new makefile variables
SteveBronder Aug 21, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 40 additions & 9 deletions make/compiler_flags
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,32 @@ CXX_MAJOR := $(shell $(CXX) -dumpversion 2>&1 | cut -d'.' -f1)
CXX_MINOR := $(shell $(CXX) -dumpversion 2>&1 | cut -d'.' -f2)


################################################################################
# Set optional compiler flags for performance
#
# These flags are compiler and compiler version specific optimization flags.
#
# Because clang-6 and clang-7's -dumpversion reports version 4.2.1
# and clang-6 needs either the llvm-ar or ar gold plugin to use link time
# optimization (flto), flto is only turned on for clang versions greater than 8.
# For gcc all versions after 4.9 can use the system `ar` and other utilities
# when compiling with flto.
#
# FLAGS:
# CXXFLAGS_OPTIM: Additional flags to CXXFLAGS
# CPPFLAGS_OPTIM: Additional flags to CXXFLAGS
# CXXFLAGS_OPTIM_TBB: Additional flags to TBB_CXXFLAGS
# CXXFLAGS_OPTIM_SUNDIALS: Additional flags to CXXFLAGS_SUNDIALS
# CPPFLAGS_OPTIM_SUNDIALS: Additional flags to CPPFLAGS_SUNDIALS
# CXXFLAGS_FLTO: Additional flags for compiling with FLTO
# CPPFLAGS_FLTO: Additional flags for compiling with FLTO
# CXXFLAGS_FLTO_TBB: Additional flags for compiling tbb with flto
# CXXFLAGS_FLTO_SUNDIALS: Additional flags for compiling c++ sundials with flto
# CPPFLAGS_FLTO_SUNDIALS: Additional flags for compiling c sundials with flto
# LDFLAGS_OPTIM: Adding optimization flags to LDFLAGS
# LDFLAGS_FLTO: Adding flto options to LDFLAGS
# LDFLAGS_MPI_FLTO: For adding flto options to MPI build
##

################################################################################
# Set default compiler flags
Expand All @@ -80,14 +106,14 @@ INC_GTEST ?= -I $(GTEST)/include -I $(GTEST)

## setup precompiler options
CPPFLAGS_BOOST ?= -DBOOST_DISABLE_ASSERTS
CPPFLAGS_SUNDIALS ?= -DNO_FPRINTF_OUTPUT
CPPFLAGS_GTEST ?=
CPPFLAGS_SUNDIALS ?= -DNO_FPRINTF_OUTPUT $(CPPFLAGS_OPTIM_SUNDIALS) $(CXXFLAGS_FLTO_SUNDIALS)
#CPPFLAGS_GTEST ?=


## setup compiler flags
CXXFLAGS_LANG ?= -std=c++1y
#CXXFLAGS_BOOST
CXXFLAGS_SUNDIALS ?= -pipe
#CXXFLAGS_BOOST ?=
CXXFLAGS_SUNDIALS ?= -pipe $(CXXFLAGS_OPTIM_SUNDIALS) $(CPPFLAGS_FLTO_SUNDIALS)
#CXXFLAGS_GTEST


Expand Down Expand Up @@ -224,7 +250,7 @@ endif


CXXFLAGS_TBB ?= -I $(TBB)/include
LDFLAGS_TBB ?= -Wl,-L,"$(TBB_BIN_ABSOLUTE_PATH)" -Wl,-rpath,"$(TBB_BIN_ABSOLUTE_PATH)"
LDFLAGS_TBB ?= -Wl,-L,"$(TBB_BIN_ABSOLUTE_PATH)" -Wl,-rpath,"$(TBB_BIN_ABSOLUTE_PATH)" $(LDFLAGS_FLTO_FLTO) $(LDFLAGS_OPTIM_TBB)
LDLIBS_TBB ?=

################################################################################
Expand All @@ -251,13 +277,13 @@ ifdef STAN_MPI

BOOST_LIBRARY_ABSOLUTE_PATH = $(abspath $(BOOST)/stage/lib)

LDFLAGS_MPI ?= -Wl,-L,"$(BOOST_LIBRARY_ABSOLUTE_PATH)" -Wl,-rpath,"$(BOOST_LIBRARY_ABSOLUTE_PATH)"
LDFLAGS_MPI ?= -Wl,-L,"$(BOOST_LIBRARY_ABSOLUTE_PATH)" -Wl,-rpath,"$(BOOST_LIBRARY_ABSOLUTE_PATH)" $(LDFLAGS_MPI_FLTO) $(MPI_OPTIM)
CXXFLAGS_MPI ?= -Wno-delete-non-virtual-dtor
endif

CXXFLAGS += $(CXXFLAGS_LANG) $(CXXFLAGS_OS) $(CXXFLAGS_WARNINGS) $(CXXFLAGS_BOOST) $(CXXFLAGS_EIGEN) $(CXXFLAGS_OPENCL) $(CXXFLAGS_MPI) $(CXXFLAGS_THREADS) $(CXXFLAGS_TBB) -O$(O) $(INC)
CPPFLAGS += $(CPPFLAGS_LANG) $(CPPFLAGS_OS) $(CPPFLAGS_WARNINGS) $(CPPFLAGS_BOOST) $(CPPFLAGS_EIGEN) $(CPPFLAGS_OPENCL) $(CPPFLAGS_MPI) $(CPPFLAGS_TBB)
LDFLAGS += $(LDFLAGS_LANG) $(LDFLAGS_OS) $(LDFLAGS_WARNINGS) $(LDFLAGS_BOOST) $(LDFLAGS_EIGEN) $(LDFLAGS_OPENCL) $(LDFLAGS_MPI) $(LDFLAGS_TBB)
CXXFLAGS += $(CXXFLAGS_LANG) $(CXXFLAGS_OS) $(CXXFLAGS_WARNINGS) $(CXXFLAGS_BOOST) $(CXXFLAGS_EIGEN) $(CXXFLAGS_OPENCL) $(CXXFLAGS_MPI) $(CXXFLAGS_THREADS) $(CXXFLAGS_TBB) $(CXXFLAGS_FLTO) $(CXXFLAGS_OPTIM) -O$(O) $(INC)
CPPFLAGS += $(CPPFLAGS_LANG) $(CPPFLAGS_OS) $(CPPFLAGS_WARNINGS) $(CPPFLAGS_BOOST) $(CPPFLAGS_EIGEN) $(CPPFLAGS_OPENCL) $(CPPFLAGS_MPI) $(CPPFLAGS_TBB) $(CPPFLAGS_FLTO) $(CPPFLAGS_OPTIM)
LDFLAGS += $(LDFLAGS_LANG) $(LDFLAGS_OS) $(LDFLAGS_WARNINGS) $(LDFLAGS_BOOST) $(LDFLAGS_EIGEN) $(LDFLAGS_OPENCL) $(LDFLAGS_MPI) $(LDFLAGS_TBB) $(LDFLAGS_FLTO) $(LDFLAGS_OPTIM)
LDLIBS += $(LDLIBS_LANG) $(LDLIBS_OS) $(LDLIBS_WARNINGS) $(LDLIBS_BOOST) $(LDLIBS_EIGEN) $(LDLIBS_OPENCL) $(LDLIBS_MPI) $(LDLIBS_TBB)

.PHONY: print-compiler-flags
Expand All @@ -280,15 +306,20 @@ print-compiler-flags:
@echo ' Compiler flags (each can be overriden separately):'
@echo ' - CXXFLAGS_LANG ' $(CXXFLAGS_LANG)
@echo ' - CXXFLAGS_WARNINGS ' $(CXXFLAGS_WARNINGS)
@echo ' - CXXFLAGS_OPTIM ' $(CXXFLAGS_OPTIM)
@echo ' - CXXFLAGS_FLTO ' $(CXXFLAGS_FLTO)
@echo ' - CXXFLAGS_BOOST ' $(CXXFLAGS_BOOST)
@echo ' - CXXFLAGS_EIGEN ' $(CXXFLAGS_EIGEN)
@echo ' - CXXFLAGS_OS ' $(CXXFLAGS_OS)
@echo ' - CXXFLAGS_GTEST ' $(CXXFLAGS_GTEST)
@echo ' - CXXFLAGS_THREADS ' $(CXXFLAGS_THREADS)
@echo ' - CXXFLAGS_OPENCL ' $(CXXFLAGS_OPENCL)
@echo ' - CXXFLAGS_TBB ' $(CXXFLAGS_TBB)
@echo ' - CXXFLAGS_OPTIM_TBB ' $(CXXFLAGS_OPTIM_TBB)
@echo ' - CXXFLAGS_MPI ' $(CXXFLAGS_MPI)
@echo ' - CFLAGS_SUNDIALS ' $(CFLAGS_SUNDIALS)
@echo ' - CXXFLAGS_SUNDIALS ' $(CXXFLAGS_SUNDIALS)
@echo ' - CXXFLAGS_OPTIM_SUNDIALS ' $(CXXFLAGS_OPTIM_SUNDIALS)
@echo ' LDLIBS:'
@echo ' - LDLIBS_LANG ' $(LDLIBS_LANG)
@echo ' - LDLIBS_WARNINGS ' $(LDLIBS_WARNINGS)
Expand Down
8 changes: 4 additions & 4 deletions make/libraries
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ SUNDIALS_KINSOL := $(patsubst %.c,%.o, \
SUNDIALS_NVECSERIAL := $(patsubst %.c,%.o,\
$(addprefix $(SUNDIALS)/src/, nvector/serial/nvector_serial.c sundials/sundials_math.c))

$(sort $(SUNDIALS_CVODES) $(SUNDIALS_IDAS) $(SUNDIALS_KINSOL) $(SUNDIALS_NVECSERIAL)) : CXXFLAGS = $(CXXFLAGS_SUNDIALS) $(CXXFLAGS_OS) -O$(O) $(INC_SUNDIALS)
$(sort $(SUNDIALS_CVODES) $(SUNDIALS_IDAS) $(SUNDIALS_KINSOL) $(SUNDIALS_NVECSERIAL)) : CPPFLAGS = $(CPPFLAGS_SUNDIALS) $(CPPFLAGS_OS)
$(sort $(SUNDIALS_CVODES) $(SUNDIALS_IDAS) $(SUNDIALS_KINSOL) $(SUNDIALS_NVECSERIAL)) : CXXFLAGS = $(CXXFLAGS_SUNDIALS) $(CXXFLAGS_OS) $(CXXFLAGS_OPTIM_SUNDIALS) -O$(O) $(INC_SUNDIALS)
$(sort $(SUNDIALS_CVODES) $(SUNDIALS_IDAS) $(SUNDIALS_KINSOL) $(SUNDIALS_NVECSERIAL)) : CPPFLAGS = $(CPPFLAGS_SUNDIALS) $(CPPFLAGS_OS) $(CPPFLAGS_OPTIM_SUNDIALS) -O$(O)
$(sort $(SUNDIALS_CVODES) $(SUNDIALS_IDAS) $(SUNDIALS_KINSOL) $(SUNDIALS_NVECSERIAL)) : %.o : %.c
@mkdir -p $(dir $@)
$(COMPILE.cpp) -x c -include $(SUNDIALS)/include/stan_sundials_printf_override.hpp $< $(OUTPUT_OPTION)
Expand Down Expand Up @@ -114,11 +114,11 @@ TBB_CXX_TYPE ?= $(CXX_TYPE)
# Set c compiler used for the TBB
ifeq (clang,$(CXX_TYPE))
TBB_CC ?= $(subst clang++,clang,$(CXX))
TBB_CXXFLAGS ?= -Wno-unknown-warning-option -Wno-deprecated-copy
TBB_CXXFLAGS ?= -Wno-unknown-warning-option -Wno-deprecated-copy $(CXXFLAGS_OPTIM_TBB) $(CXXFLAGS_FLTO_TBB)
endif
ifeq (gcc,$(CXX_TYPE))
TBB_CC ?= $(subst g++,gcc,$(CXX))
TBB_CXXFLAGS ?= -Wno-unknown-warning-option -Wno-missing-attributes -Wno-class-memaccess -Wno-sized-deallocation
TBB_CXXFLAGS ?= -Wno-unknown-warning-option -Wno-missing-attributes -Wno-class-memaccess -Wno-sized-deallocation $(CXXFLAGS_OPTIM_TBB) $(CXXFLAGS_FLTO_TBB)
endif
TBB_CC ?= $(CC)

Expand Down
13 changes: 5 additions & 8 deletions stan/math/rev/core/vari.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ class vari_base {
*/
virtual void chain() = 0;
virtual void set_zero_adjoint() = 0;
virtual ~vari_base() noexcept {}

/**
* Allocate memory from the underlying memory pool. This memory is
Expand Down Expand Up @@ -104,7 +103,7 @@ class vari_value<T, require_floating_point_t<T>> : public vari_base {
*/
template <typename S, require_convertible_t<S&, T>* = nullptr>
vari_value(S x) noexcept : val_(x), adj_(0.0) { // NOLINT
ChainableStack::instance_->var_stack_.emplace_back(this);
ChainableStack::instance_->var_stack_.push_back(this);
}

/**
Expand All @@ -125,14 +124,12 @@ class vari_value<T, require_floating_point_t<T>> : public vari_base {
template <typename S, require_convertible_t<S&, T>* = nullptr>
vari_value(S x, bool stacked) noexcept : val_(x), adj_(0.0) {
if (stacked) {
ChainableStack::instance_->var_stack_.emplace_back(this);
ChainableStack::instance_->var_stack_.push_back(this);
} else {
ChainableStack::instance_->var_nochain_stack_.emplace_back(this);
ChainableStack::instance_->var_nochain_stack_.push_back(this);
}
}

~vari_value() = default;

inline void chain() {}

/**
Expand Down Expand Up @@ -272,9 +269,9 @@ class vari_value<T, require_eigen_dense_base_t<T>> : public vari_base {
val_(eigen_map(val_mem_, x.rows(), x.cols()) = x),
adj_(eigen_map(adj_mem_, x.rows(), x.cols()).setZero()) {
if (stacked) {
ChainableStack::instance_->var_stack_.emplace_back(this);
ChainableStack::instance_->var_stack_.push_back(this);
} else {
ChainableStack::instance_->var_nochain_stack_.emplace_back(this);
ChainableStack::instance_->var_nochain_stack_.push_back(this);
}
}

Expand Down
2 changes: 1 addition & 1 deletion stan/math/rev/fun/log.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ namespace stan {
namespace math {

namespace internal {
class log_vari : public op_v_vari {
class log_vari final : public op_v_vari {
public:
explicit log_vari(vari* avi) : op_v_vari(std::log(avi->val_), avi) {}
void chain() { avi_->adj_ += adj_ / avi_->val_; }
Expand Down