Skip to content

Commit dbe4ceb

Browse files
authored
Merge pull request #3932 from QMCPACK/rc_3140
Rc_3140
2 parents 5f4ba01 + 801bead commit dbe4ceb

File tree

386 files changed

+8864
-4470
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

386 files changed

+8864
-4470
lines changed

CHANGELOG.md

+23
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,29 @@
22

33
Notable changes to QMCPACK are documented in this file.
44

5+
## [3.14.0] - 2022-04-06
6+
7+
This release focuses on performance improvements to the OpenMP target offload version for GPUs as well as ongoing minor
8+
improvements. The new GPU implementation rivals the legacy CUDA version for performance for broad range of problems
9+
while offering more functionality, such as three body Jastrow functions. Developers are very interested in feedback from
10+
users about the new version and will prioritize developments based on comments received. A new driver\_version switch is
11+
introduced, currently optional, to disambiguate between the versions and their inputs.
12+
13+
- New global driver\_version switch to select between batched and legacy codes. This will become a required input tag in the next major release series of QMCPACK, but remains optional in 3.x versions [\#3897](https://github.com/QMCPACK/qmcpack/pull/3897)
14+
- Optimization of block sizes in GPU offload kernels [\#3910](https://github.com/QMCPACK/qmcpack/pull/3910)
15+
- GPU Offload of one-body Jastrow ratio calculation in pseudopotential evaluation [\#3905](https://github.com/QMCPACK/qmcpack/pull/3905)
16+
- GPU Offload of some Coulomb potential evaluations [\#3842](https://github.com/QMCPACK/qmcpack/pull/3842)
17+
- Partial GPU offload of multideterminant evaluation e.g. [\#3892](https://github.com/QMCPACK/qmcpack/pull/3892)
18+
- Increased performance via more selective distance table computation [\#3846](https://github.com/QMCPACK/qmcpack/pull/3846)
19+
- Improved performance on AMD GPUs via rocSOLVER integration [\#3756](https://github.com/QMCPACK/qmcpack/issues/3756)
20+
- HIP build options shown in output [\#3919](https://github.com/QMCPACK/qmcpack/pull/3919)
21+
- Documentation improvements, particularly relating to installation.
22+
- Various bug fixes and ongoing cleanup.
23+
24+
### NEXUS
25+
26+
- Nexus: proper use of max\_seconds in legacy drivers [\#3877](https://github.com/QMCPACK/qmcpack/pull/3877)
27+
528
## [3.13.0] - 2022-02-16
629

730
### Notes

CMake/ClangCompilers.cmake

+12-7
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ if(QMC_OMP)
1616
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
1717

1818
if(ENABLE_OFFLOAD)
19-
if (QMC_CUDA2HIP)
19+
if(QMC_CUDA2HIP)
2020
set(OFFLOAD_TARGET_DEFAULT "amdgcn-amd-amdhsa")
2121
else()
2222
set(OFFLOAD_TARGET_DEFAULT "nvptx64-nvidia-cuda")
@@ -36,14 +36,18 @@ if(QMC_OMP)
3636
set(OFFLOAD_ARCH gfx906)
3737
endif()
3838

39-
if(NOT DEFINED OFFLOAD_ARCH AND OFFLOAD_TARGET MATCHES "nvptx64" AND DEFINED CMAKE_CUDA_ARCHITECTURES)
39+
if(NOT DEFINED OFFLOAD_ARCH
40+
AND OFFLOAD_TARGET MATCHES "nvptx64"
41+
AND DEFINED CMAKE_CUDA_ARCHITECTURES)
4042
list(LENGTH CMAKE_CUDA_ARCHITECTURES NUMBER_CUDA_ARCHITECTURES)
4143
if(NUMBER_CUDA_ARCHITECTURES EQUAL "1")
4244
set(OFFLOAD_ARCH sm_${CMAKE_CUDA_ARCHITECTURES})
4345
else()
44-
message(FATAL_ERROR "LLVM does not yet support offload to multiple architectures! "
45-
"Deriving OFFLOAD_ARCH from CMAKE_CUDA_ARCHITECTURES failed. "
46-
"Please keep only one entry in CMAKE_CUDA_ARCHITECTURES or set OFFLOAD_ARCH.")
46+
message(
47+
FATAL_ERROR
48+
"LLVM does not yet support offload to multiple architectures! "
49+
"Deriving OFFLOAD_ARCH from CMAKE_CUDA_ARCHITECTURES failed. "
50+
"Please keep only one entry in CMAKE_CUDA_ARCHITECTURES or set OFFLOAD_ARCH.")
4751
endif()
4852
endif()
4953

@@ -69,7 +73,8 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=vla")
6973
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wvla")
7074

7175
# set compiler warnings
72-
string(APPEND CMAKE_CXX_FLAGS " -Wall -Wno-unused-variable -Wno-overloaded-virtual -Wno-unused-private-field -Wno-unused-local-typedef")
76+
string(APPEND CMAKE_CXX_FLAGS
77+
" -Wall -Wno-unused-variable -Wno-overloaded-virtual -Wno-unused-private-field -Wno-unused-local-typedef")
7378

7479
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 11.0)
7580
string(APPEND CMAKE_CXX_FLAGS " -Wsuggest-override")
@@ -142,7 +147,7 @@ endif(QMC_BUILD_STATIC)
142147

143148
# Coverage
144149
if(ENABLE_GCOV)
145-
set(GCOV_COVERAGE TRUE)
150+
set(GCOV_SUPPORTED TRUE)
146151
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage")
147152
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --coverage")
148153
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --coverage")

CMake/FindRMG.cmake

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
# Locate rmg-cpu
1+
# Locate rmg-cpu
22
# Take RMG_BIN as hint for location
33

44
find_program(RMG_CPU_EXE rmg-cpu HINTS ${RMG_BIN})
55

66
set(RMG_FOUND FALSE)
77
if(RMG_CPU_EXE)
8-
MESSAGE(STATUS "RMG_CPU_EXE=${RMG_CPU_EXE}")
8+
message(STATUS "RMG_CPU_EXE=${RMG_CPU_EXE}")
99
set(RMG_FOUND TRUE)
1010
endif()
1111

CMake/GNUCompilers.cmake

+14-8
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ if(QMC_OMP)
1414
message(WARNING "GCC OpenMP offload feature requires 12.0 or higher.")
1515
endif()
1616

17-
if (QMC_CUDA2HIP)
17+
if(QMC_CUDA2HIP)
1818
set(OFFLOAD_TARGET_DEFAULT "amdgcn-amdhsa")
1919
else()
2020
set(OFFLOAD_TARGET_DEFAULT "nvptx-none")
@@ -28,14 +28,18 @@ if(QMC_OMP)
2828
set(OFFLOAD_ARCH gfx906)
2929
endif()
3030

31-
if(NOT DEFINED OFFLOAD_ARCH AND OFFLOAD_TARGET MATCHES "nvptx-none" AND DEFINED CMAKE_CUDA_ARCHITECTURES)
31+
if(NOT DEFINED OFFLOAD_ARCH
32+
AND OFFLOAD_TARGET MATCHES "nvptx-none"
33+
AND DEFINED CMAKE_CUDA_ARCHITECTURES)
3234
list(LENGTH CMAKE_CUDA_ARCHITECTURES NUMBER_CUDA_ARCHITECTURES)
3335
if(NUMBER_CUDA_ARCHITECTURES EQUAL "1")
3436
set(OFFLOAD_ARCH sm_${CMAKE_CUDA_ARCHITECTURES})
3537
else()
36-
message(FATAL_ERROR "GCC does not yet support offload to multiple architectures! "
37-
"Deriving OFFLOAD_ARCH from CMAKE_CUDA_ARCHITECTURES failed. "
38-
"Please keep only one entry in CMAKE_CUDA_ARCHITECTURES or set OFFLOAD_ARCH.")
38+
message(
39+
FATAL_ERROR
40+
"GCC does not yet support offload to multiple architectures! "
41+
"Deriving OFFLOAD_ARCH from CMAKE_CUDA_ARCHITECTURES failed. "
42+
"Please keep only one entry in CMAKE_CUDA_ARCHITECTURES or set OFFLOAD_ARCH.")
3943
endif()
4044
endif()
4145

@@ -47,7 +51,10 @@ if(QMC_OMP)
4751
set(OPENMP_OFFLOAD_COMPILE_OPTIONS
4852
"${OPENMP_OFFLOAD_COMPILE_OPTIONS} -foffload-options=${OFFLOAD_TARGET}=\"-misa=${OFFLOAD_ARCH}\"")
4953
else()
50-
message(WARNING "We don't know how to handle OFFLOAD_ARCH=${OFFLOAD_ARCH} for OFFLOAD_TARGET=${OFFLOAD_TARGET}. Got ignored.")
54+
message(
55+
WARNING
56+
"We don't know how to handle OFFLOAD_ARCH=${OFFLOAD_ARCH} for OFFLOAD_TARGET=${OFFLOAD_TARGET}. Got ignored."
57+
)
5158
endif()
5259
endif()
5360
else()
@@ -59,8 +66,7 @@ endif(QMC_OMP)
5966
add_definitions(-Drestrict=__restrict__)
6067

6168
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -finline-limit=1000 -fstrict-aliasing -funroll-all-loops")
62-
set(CMAKE_CXX_FLAGS
63-
"${CMAKE_CXX_FLAGS} -finline-limit=1000 -fstrict-aliasing -funroll-all-loops")
69+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -finline-limit=1000 -fstrict-aliasing -funroll-all-loops")
6470

6571
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -fno-omit-frame-pointer")
6672
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-omit-frame-pointer")

CMake/macros.cmake

+2-1
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,8 @@ function(
122122

123123
if(NOT QMC_OMP)
124124
if(${THREADS} GREATER 1)
125-
message(VERBOSE "Disabling test ${TESTNAME} (exceeds maximum number of threads=1 if OpenMP is disabled -DQMC_OMP=0)")
125+
message(VERBOSE
126+
"Disabling test ${TESTNAME} (exceeds maximum number of threads=1 if OpenMP is disabled -DQMC_OMP=0)")
126127
return()
127128
endif()
128129
endif()

CMake/run_rmg.cmake

+27-44
Original file line numberDiff line numberDiff line change
@@ -10,23 +10,23 @@ if(QMC_NO_SLOW_CUSTOM_TESTING_COMMANDS)
1010
else(QMC_NO_SLOW_CUSTOM_TESTING_COMMANDS)
1111

1212
function(
13-
ADD_RMG_TEST
14-
TESTNAME
15-
NPROCS
16-
NTHREADS
17-
TEST_BINARY
18-
WORKDIR
19-
TEST_INPUT)
13+
ADD_RMG_TEST
14+
TESTNAME
15+
NPROCS
16+
NTHREADS
17+
TEST_BINARY
18+
WORKDIR
19+
TEST_INPUT)
2020
#if(HAVE_MPI)
2121
# add_test(NAME ${TESTNAME} COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${NPROCS} ${MPIEXEC_PREFLAGS}
2222
# ${TEST_BINARY} ${TEST_INPUT})
2323
#else(HAVE_MPI)
24-
add_test(NAME ${TESTNAME} COMMAND ${TEST_BINARY} ${TEST_INPUT})
25-
#endif(HAVE_MPI)
24+
add_test(NAME ${TESTNAME} COMMAND ${TEST_BINARY} ${TEST_INPUT})
25+
#endif(HAVE_MPI)
2626
set_tests_properties(
2727
${TESTNAME}
2828
PROPERTIES ENVIRONMENT
29-
"OMP_NUM_THREADS=${NTHREADS};RMG_NUM_THREADS=${NTHREADS}"
29+
"OMP_NUM_THREADS=${NTHREADS};RMG_NUM_THREADS=${NTHREADS}"
3030
PROCESSORS
3131
${NPROCS}
3232
PROCESSOR_AFFINITY
@@ -39,31 +39,13 @@ else(QMC_NO_SLOW_CUSTOM_TESTING_COMMANDS)
3939
PROPERTY LABELS "converter;rmg")
4040
endfunction()
4141

42-
function(
43-
ADD_RMG_CONVERT_TEST
44-
TESTNAME
45-
PREFIX
46-
WORKDIR
47-
TEST_INPUT)
42+
function(ADD_RMG_CONVERT_TEST TESTNAME PREFIX WORKDIR TEST_INPUT)
4843
add_test(NAME ${TESTNAME} COMMAND $<TARGET_FILE:convert4qmc> -rmg ${TEST_INPUT} -prefix ${PREFIX})
49-
set_tests_properties(
50-
${TESTNAME}
51-
PROPERTIES
52-
WORKING_DIRECTORY
53-
${WORKDIR})
54-
set_property(
55-
TEST ${TESTNAME}
56-
APPEND
57-
PROPERTY LABELS "converter;rmg")
44+
set_tests_properties(${TESTNAME} PROPERTIES WORKING_DIRECTORY ${WORKDIR})
45+
set_property(TEST ${TESTNAME} APPEND PROPERTY LABELS "converter;rmg")
5846
endfunction()
5947

60-
function(
61-
RUN_RMG_TEST
62-
BASE_NAME
63-
SRC_DIR
64-
NPROCS
65-
NTHREADS
66-
TEST_NAME)
48+
function(RUN_RMG_TEST BASE_NAME SRC_DIR NPROCS NTHREADS TEST_NAME)
6749
set(FULL_NAME ${BASE_NAME}-np-${NPROCS})
6850
set(${TEST_NAME}
6951
${FULL_NAME}
@@ -85,26 +67,27 @@ function(SOFTLINK_H5_RMG_WAVES SOURCE PREFIX)
8567
# set(${TEST_NAME}
8668
# "LINK_${SOURCE}_h5_Waves"
8769
# PARENT_SCOPE)
88-
add_test(NAME LINK_${SOURCE}_h5_Waves COMMAND ${qmcpack_SOURCE_DIR}/tests/scripts/clean_and_link_h5.sh
89-
${SOURCE}/Waves/wave.out.h5 ${SOURCE}/${PREFIX}.h5)
70+
add_test(NAME LINK_${SOURCE}_h5_Waves COMMAND ${qmcpack_SOURCE_DIR}/tests/scripts/clean_and_link_h5.sh
71+
${SOURCE}/Waves/wave.out.h5 ${SOURCE}/${PREFIX}.h5)
9072
set_tests_properties(LINK_${SOURCE}_h5_Waves PROPERTIES DEPENDS ${SOURCE}-scf)
9173
set_property(TEST LINK_${SOURCE}_h5_Waves APPEND PROPERTY LABELS "rmg")
9274
endfunction()
9375

9476
function(SOFTLINK_RMG_INPUT SOURCE TARGET PREFIX TEST_NAME)
9577
set(${TEST_NAME}
96-
"LINK_${SOURCE}_TO_${TARGET}"
78+
"LINK_${SOURCE}_TO_${TARGET}"
9779
PARENT_SCOPE)
98-
add_test(NAME LINK_${SOURCE}_TO_${TARGET} COMMAND ${qmcpack_SOURCE_DIR}/tests/scripts/clean_and_link_h5.sh
99-
${SOURCE}/${PREFIX}.h5 ${SOURCE}-${TARGET}/${PREFIX}.h5)
100-
set_tests_properties(LINK_${SOURCE}_TO_${TARGET} PROPERTIES DEPENDS ${SOURCE}-scf)
101-
set_property(TEST LINK_${SOURCE}_TO_${TARGET} APPEND PROPERTY LABELS "rmg")
102-
add_test(NAME COPY_${SOURCE}_XML_TO_${TARGET} COMMAND
103-
bash -c "mkdir -p ${SOURCE}-${TARGET}; \
80+
add_test(NAME LINK_${SOURCE}_TO_${TARGET} COMMAND ${qmcpack_SOURCE_DIR}/tests/scripts/clean_and_link_h5.sh
81+
${SOURCE}/${PREFIX}.h5 ${SOURCE}-${TARGET}/${PREFIX}.h5)
82+
set_tests_properties(LINK_${SOURCE}_TO_${TARGET} PROPERTIES DEPENDS ${SOURCE}-scf)
83+
set_property(TEST LINK_${SOURCE}_TO_${TARGET} APPEND PROPERTY LABELS "rmg")
84+
add_test(
85+
NAME COPY_${SOURCE}_XML_TO_${TARGET}
86+
COMMAND
87+
bash -c "mkdir -p ${SOURCE}-${TARGET}; \
10488
cp ${SOURCE}/${PREFIX}.structure.xml ${SOURCE}-${TARGET}/${PREFIX}.structure.xml ; \
10589
cp ${SOURCE}/${PREFIX}.wfnoj.xml ${SOURCE}-${TARGET}/${PREFIX}.wfnoj.xml ; \
10690
cp ${SOURCE}/*.qmcpp.xml ${SOURCE}-${TARGET}/")
107-
set_tests_properties(COPY_${SOURCE}_XML_TO_${TARGET} PROPERTIES DEPENDS ${SOURCE}-scf)
108-
set_property(TEST COPY_${SOURCE}_XML_TO_${TARGET} APPEND PROPERTY LABELS "rmg")
91+
set_tests_properties(COPY_${SOURCE}_XML_TO_${TARGET} PROPERTIES DEPENDS ${SOURCE}-scf)
92+
set_property(TEST COPY_${SOURCE}_XML_TO_${TARGET} APPEND PROPERTY LABELS "rmg")
10993
endfunction()
110-

CMake/test_labels.cmake

+6-4
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
11
function(ADD_TEST_LABELS TEST_NAME TEST_LABELS)
22
set(TEST_LABELS_TEMP "")
33
set(TEST_LABELS_UNIQUE_NAME TEST_LABELS_${TEST_NAME}_${QMC_CUDA}_${QMC_COMPLEX}_${QMC_MIXED_PRECISION})
4-
if (DEFINED ${TEST_LABELS_UNIQUE_NAME})
4+
if(DEFINED ${TEST_LABELS_UNIQUE_NAME})
55
set(TEST_LABELS_TEMP ${${TEST_LABELS_UNIQUE_NAME}})
66
else()
77
set(SUCCESS FALSE)
88
execute_process(
9-
COMMAND ${Python3_EXECUTABLE} ${qmcpack_SOURCE_DIR}/tests/scripts/test_labels.py ${TEST_NAME} ${QMC_CUDA} ${QMC_COMPLEX}
10-
${QMC_MIXED_PRECISION}
9+
COMMAND ${Python3_EXECUTABLE} ${qmcpack_SOURCE_DIR}/tests/scripts/test_labels.py ${TEST_NAME} ${QMC_CUDA}
10+
${QMC_COMPLEX} ${QMC_MIXED_PRECISION}
1111
OUTPUT_VARIABLE TEST_LABELS_TEMP
1212
RESULT_VARIABLE SUCCESS)
1313
if(${SUCCESS} STREQUAL "0")
14-
set(${TEST_LABELS_UNIQUE_NAME} ${TEST_LABELS_TEMP} CACHE INTERNAL "for internal use only; do not modify")
14+
set(${TEST_LABELS_UNIQUE_NAME}
15+
${TEST_LABELS_TEMP}
16+
CACHE INTERNAL "for internal use only; do not modify")
1517
else()
1618
message("Warning: test labeling failed. Test labeling error output:\n${TEST_LABELS_TEMP}")
1719
set(TEST_LABELS_TEMP "")

CMake/unit_test.cmake

+1-4
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,5 @@ function(add_test_target_in_output_location TARGET_NAME_TO_TEST EXE_DIR_RELATIVE
4949
set(TESTNAME build_output_${TARGET_NAME_TO_TEST}_exists)
5050
add_test(NAME ${TESTNAME} COMMAND ls ${qmcpack_BINARY_DIR}/bin/${BASE_NAME})
5151

52-
set_property(
53-
TEST ${TESTNAME}
54-
APPEND
55-
PROPERTY LABELS "unit;deterministic")
52+
set_property(TEST ${TESTNAME} APPEND PROPERTY LABELS "unit;deterministic")
5653
endfunction()

0 commit comments

Comments
 (0)