QMCPACK
diff --git a/‎.github/workflows/ci-github-action-auto-rebase.yaml
+3-3 b/‎.github/workflows/ci-github-action-auto-rebase.yaml
+3-3
diff --git a/‎.github/workflows/ci-github-actions-self-hosted.yaml
+15-15 b/‎.github/workflows/ci-github-actions-self-hosted.yaml
+15-15
diff --git a/‎.github/workflows/ci-github-actions.yaml
+7-2 b/‎.github/workflows/ci-github-actions.yaml
+7-2
diff --git a/‎.gitignore
+1 b/‎.gitignore
+1
diff --git a/‎CHANGELOG.md
+88-1 b/‎CHANGELOG.md
+88-1
diff --git a/‎CMake/ClangCompilers.cmake
+6-6 b/‎CMake/ClangCompilers.cmake
+6-6
diff --git a/‎CMake/IntelCompilers.cmake
+5-7 b/‎CMake/IntelCompilers.cmake
+5-7
diff --git a/‎CMake/IntelDPCPPConfig-modified.cmake
-5 b/‎CMake/IntelDPCPPConfig-modified.cmake
-5
diff --git a/‎CMake/TestCxx17Library.cmake
+4-4 b/‎CMake/TestCxx17Library.cmake
+4-4
diff --git a/‎CMake/Testlibstdc++.cmake
+8 b/‎CMake/Testlibstdc++.cmake
+8
@@ -31,7 +31,7 @@ jobs:
       run: tests/test_automation/github-actions/ci/run_step.sh rebase
       env:
         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        QMCPACK_BOT_GPG_KEY: ${{ QMCPACK_BOT_GPG_KEY }}
+        QMCPACK_BOT_GPG_KEY: ${{ secrets.QMCPACK_BOT_GPG_KEY }}
         QMCPACK_BOT_GPG_SIGNING_KEY: ${{ secrets.QMCPACK_BOT_GPG_SIGNING_KEY }}
 
   trigger-rebase:
@@ -48,5 +48,5 @@ jobs:
       run: tests/test_automation/github-actions/ci/run_step.sh pull-rebase
       env:
         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        QMCPACK_BOT_GPG_KEY: ${{ QMCPACK_BOT_GPG_KEY }}
-        QMCPACK_BOT_GPG_SIGNING_KEY: ${{ secrets.QMCPACK_BOT_GPG_SIGNING_KEY }}
+        QMCPACK_BOT_GPG_KEY: ${{ secrets.QMCPACK_BOT_GPG_KEY }}
+        QMCPACK_BOT_GPG_SIGNING_KEY: ${{ secrets.QMCPACK_BOT_GPG_SIGNING_KEY }}
@@ -42,7 +42,7 @@ jobs:
       - name: GitHub API Request
         if: steps.check.outputs.triggered == 'true'
         id: request
-        uses: octokit/[email protected].0
+        uses: octokit/[email protected].7
         with:
           route: ${{github.event.issue.pull_request.url}}
         env:
@@ -52,7 +52,7 @@ jobs:
       # just like any other third-party service
       - name: Create PR status
         if: steps.check.outputs.triggered == 'true'
-        uses: Sibz/github-status-action@v1
+        uses: Sibz/github-status-action@v1.1.6
         with:
           authToken: ${{secrets.GITHUB_TOKEN}}
           context: "ornl-sulfur CI ${{ matrix.jobname }}"
@@ -91,7 +91,7 @@ jobs:
 
       - name: Report PR status
         if: always() && steps.check.outputs.triggered == 'true'
-        uses: Sibz/github-status-action@v1
+        uses: Sibz/github-status-action@v1.1.6
         with:
           authToken: ${{secrets.GITHUB_TOKEN}}
           context: "ornl-sulfur CI ${{matrix.jobname}}"
@@ -123,9 +123,9 @@ jobs:
             Clang15-MPI-CUDA-AFQMC-Offload-Real,
             Clang15-MPI-CUDA-AFQMC-Offload-Complex-Mixed,
             Clang15-MPI-CUDA-AFQMC-Offload-Complex,
-            Intel19-MPI-CUDA-AFQMC-Real-Mixed, # auxiliary field, requires MPI
-            Intel19-MPI-CUDA-AFQMC-Complex-Mixed,
-            Intel19-MPI-CUDA-AFQMC-Real,
+            Intel21-MPI-CUDA-AFQMC-Real-Mixed, # auxiliary field, requires MPI
+            Intel21-MPI-CUDA-AFQMC-Complex-Mixed,
+            Intel21-MPI-CUDA-AFQMC-Real,
           ]
 
     steps:
@@ -144,7 +144,7 @@ jobs:
       - name: GitHub API Request
         if: steps.check.outputs.triggered == 'true'
         id: request
-        uses: octokit/[email protected].0
+        uses: octokit/[email protected].7
         with:
           route: ${{github.event.issue.pull_request.url}}
         env:
@@ -154,7 +154,7 @@ jobs:
       # just like any other third-party service
       - name: Create PR status
         if: steps.check.outputs.triggered == 'true'
-        uses: Sibz/github-status-action@v1
+        uses: Sibz/github-status-action@v1.1.6
         with:
           authToken: ${{secrets.GITHUB_TOKEN}}
           context: "ornl-sulfur CI ${{ matrix.jobname }}"
@@ -193,7 +193,7 @@ jobs:
 
       - name: Report PR status
         if: always() && steps.check.outputs.triggered == 'true'
-        uses: Sibz/github-status-action@v1
+        uses: Sibz/github-status-action@v1.1.6
         with:
           authToken: ${{secrets.GITHUB_TOKEN}}
           context: "ornl-sulfur CI ${{matrix.jobname}}"
@@ -244,7 +244,7 @@ jobs:
       - name: GitHub API Request
         if: steps.check.outputs.triggered == 'true'
         id: request
-        uses: octokit/[email protected].0
+        uses: octokit/[email protected].7
         with:
           route: ${{github.event.issue.pull_request.url}}
         env:
@@ -254,7 +254,7 @@ jobs:
       # just like any other third-party service
       - name: Create PR status
         if: steps.check.outputs.triggered == 'true'
-        uses: Sibz/github-status-action@v1
+        uses: Sibz/github-status-action@v1.1.6
         with:
           authToken: ${{secrets.GITHUB_TOKEN}}
           context: "ornl-nitrogen CI ${{matrix.jobname}}"
@@ -293,7 +293,7 @@ jobs:
 
       - name: Report PR status
         if: always() && steps.check.outputs.triggered == 'true'
-        uses: Sibz/github-status-action@v1
+        uses: Sibz/github-status-action@v1.1.6
         with:
           authToken: ${{secrets.GITHUB_TOKEN}}
           context: "ornl-nitrogen CI ${{matrix.jobname}}"
@@ -339,7 +339,7 @@ jobs:
       - name: GitHub API Request
         if: steps.check.outputs.triggered == 'true'
         id: request
-        uses: octokit/[email protected].0
+        uses: octokit/[email protected].7
         with:
           route: ${{github.event.issue.pull_request.url}}
         env:
@@ -349,7 +349,7 @@ jobs:
       # just like any other third-party service
       - name: Create PR status
         if: steps.check.outputs.triggered == 'true'
-        uses: Sibz/github-status-action@v1
+        uses: Sibz/github-status-action@v1.1.6
         with:
           authToken: ${{secrets.GITHUB_TOKEN}}
           context: "ornl-nitrogen CI ${{matrix.jobname}}"
@@ -388,7 +388,7 @@ jobs:
 
       - name: Report PR status
         if: always() && steps.check.outputs.triggered == 'true'
-        uses: Sibz/github-status-action@v1
+        uses: Sibz/github-status-action@v1.1.6
         with:
           authToken: ${{secrets.GITHUB_TOKEN}}
           context: "ornl-nitrogen CI ${{matrix.jobname}}"
 
@@ -127,15 +127,15 @@ jobs:
 
       - name: Upload Coverage
         if: contains(matrix.jobname, 'Gcov') && github.repository_owner == 'QMCPACK'
-        uses: codecov/codecov-action@v2
+        uses: codecov/codecov-action@v3
         with:
           files: ../qmcpack-build/coverage.xml
           flags: tests-deterministic # optional
           name: codecov-QMCPACK # optional
           fail_ci_if_error: true # optional (default = false)
 
   macos:
-    runs-on: macos-latest
+    runs-on: macos-11
     env:
       GH_JOBNAME: ${{ matrix.jobname }}
       GH_OS: macOS
@@ -149,6 +149,11 @@ jobs:
       - name: Checkout Action
         uses: actions/checkout@v3
 
+      - name: Set Python Version
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+
       - name: Setup Dependencies
         run: |
           brew install ninja hdf5 fftw boost
 
@@ -19,6 +19,7 @@ tests/solids/NiO_a4_e48_pp/NiO-fcc-supertwist111-supershift000-S1.h5
 
 # Visual Studio code settings
 .vscode/
+.cache/
 
 # Doxygen output
 doxygen/output/
 
@@ -2,6 +2,93 @@
 
 Notable changes to QMCPACK are documented in this file.
 
+## [3.16.0] - 2023-01-31
+
+This release contains important bug fixes as well as feature improvements. It is a recommended release for all users. Thanks to
+everyone who reported an issue or suggested an improvement. See GitHub for the full list of merged pull requests and closed issues.
+
+This release is expected to be the last including the legacy CUDA implementation, the version built with QMC_CUDA=1. Users should
+transition to the batched drivers which support greater functionality as well as both CPU and GPU execution. Users should adopt
+these drivers now and report any issues. The new drivers can be requested with the driver_version input parameter, see
+https://qmcpack.readthedocs.io/en/develop/performance_portable.html . In a subsequent release, the non-batched CPU drivers will also
+be removed leaving only the performance portable batched drivers. This will result in a single implementation of most functionality,
+improving overall usability and maintainability.
+
+* Important bugfix to NLPP integration grid rotations and update to all relevant deterministic test values. See issue
+  [\#4362](https://github.com/QMCPACK/qmcpack/issues/4362) for full discussion and visualization. Found and corrected by
+  @markdewing, this bug has existed since the earliest days of QMCPACK. The stochastic rotations used to randomly reorient the
+  integration grids for the non-local pseudopoptentials would not cover the full sphere unless they had many points and sufficient
+  symmetry, as was the case for the QMCPACK default. However, calculations with custom integration grids with only a few points
+  (small `nrule`) could show error or excess statistical noise in the non-local part of the pseudopotential energy. Standard
+  calculations and tests on carbon diamond, lithium hydride, and hydrocarbon molecules were not affected due to QMCPACK's
+  conservative defaults. Tests updated in [\#4383](https://github.com/QMCPACK/qmcpack/pull/4383)
+* NLPP grid randomization can be disabled for debugging and greater reproducibility [\#4394](https://github.com/QMCPACK/qmcpack/pull/4394)
+* Two-body Jastrow support for true 2D calculations [\#4289](https://github.com/QMCPACK/qmcpack/pull/4289) (contributed by @Paul-St-Young)
+* Fix for very large calculations requesting too large grids in CUDA spline implementation [\#4421](https://github.com/QMCPACK/qmcpack/pull/4421) (contributed by @pwang234)
+* Bugfix in the batched OpenMP offload implementation memory errors [\#4408](https://github.com/QMCPACK/qmcpack/pull/4408) when the
+  number of splines is not a perfectly aligned size (multiple of 8 single precision or 4 double precision).
+* Updates to test tolerances for many build types and platforms to improve reliability of deterministic tests. Goal: `ctest -L
+  deterministic` should pass on all platforms. Please report any failures.
+* Improved CMake configuration including detecting use of parallel HDF5 in non-MPI builds
+  [\#4420](https://github.com/QMCPACK/qmcpack/pull/4420) and detection of missing OpenMP support
+  [\#4422](https://github.com/QMCPACK/qmcpack/pull/4422)
+* Optimization of spinor wavefunctions with spin-orbit and pseudopotentials re-enabled
+  [\#4418](https://github.com/QMCPACK/qmcpack/pull/4418)
+* QMCPACK output now indicates status of QMC_COMPLEX [\#4412](https://github.com/QMCPACK/qmcpack/pull/4412)
+* Initial work for eventual GPU offloading of Gaussian basis wavefunctions for molecules and solids
+  [\#4407](https://github.com/QMCPACK/qmcpack/pull/4407)
+* Bugfix to support one-body Jastrow functions where only a subset of elements is given
+  [\#4405](https://github.com/QMCPACK/qmcpack/pull/4405)
+* Electron coordinates are printed in case a NaN is detected [\#4401](https://github.com/QMCPACK/qmcpack/pull/4401)
+* To evade support problems for complex reductions in OpenMP offload compilers, real builds no longer reference any complex
+  reductions [\#4379](https://github.com/QMCPACK/qmcpack/pull/4379)
+* Enabled HIP as language in CMake (requires >= 3.21) [\#3646](https://github.com/QMCPACK/qmcpack/pull/3646). When using HIP
+  targeting AMD GPUs, replace HIP_ARCH with CMAKE_HIP_ARCHITECTURES if HIP_ARCH was used to specify the GPU architectures.
+* Refinements to SYCL usage, e.g., [\#4384](https://github.com/QMCPACK/qmcpack/pull/4384),
+  [\#4382](https://github.com/QMCPACK/qmcpack/pull/4382), [\#4380](https://github.com/QMCPACK/qmcpack/pull/4380) 
+* Many expanded tests including for NLPP parameter derivatives [\#4394](https://github.com/QMCPACK/qmcpack/pull/4394), more boundary
+  conditions in distance tables [\#4374](https://github.com/QMCPACK/qmcpack/pull/4374), for reptation Monte Carlo observables
+  [\#4327](https://github.com/QMCPACK/qmcpack/pull/4327), and orbital rotations
+  [\#4304](https://github.com/QMCPACK/qmcpack/pull/4304)
+* Many updates to HDF5 usage including adoption of HDF5 1.10 API [\#4352](https://github.com/QMCPACK/qmcpack/pull/4352) and
+  related cleanup, e.g. [\#4300](https://github.com/QMCPACK/qmcpack/pull/4300)
+* Initial Perlmutter CPU build recipe [\#4398](https://github.com/QMCPACK/qmcpack/pull/4398)
+* Initial ALCF Sunspot build recipe including offloading to Intel Ponte Vecchio/Xe HPC GPU
+  [\#4391](https://github.com/QMCPACK/qmcpack/pull/4391)
+* Better support for FreeBSD [\#4416](https://github.com/QMCPACK/qmcpack/pull/4416)
+* Minimum supported Intel classic compiler version is 2021.1. [\#4389](https://github.com/QMCPACK/qmcpack/pull/4389)
+* Ongoing improvement to orbital optimization and rotation, e.g. [\#4288](https://github.com/QMCPACK/qmcpack/pull/4288), [\#4402](https://github.com/QMCPACK/qmcpack/pull/4402)
+* Ongoing code cleanup, e.g. [\#4276](https://github.com/QMCPACK/qmcpack/pull/4276),
+  [\#4275](https://github.com/QMCPACK/qmcpack/pull/4275), [\#4273](https://github.com/QMCPACK/qmcpack/pull/4273)
+* Updated bmpi3 MPI "wrapper"
+* Various other small bug fixes and quality of life improvements. See the full list of merged PRs on GitHub for details.
+
+### Known problems
+
+* When offload builds are compiled with CUDA toolkit versions above 11.2 (tested 11.3-11.8) using LLVM15, multideterminant tests and
+  functionality will fail, seemingly due to an issue with the toolkit. This is discussed in
+  https://github.com/llvm/llvm-project/issues/54633 . All other functionality appears to work as expected. As a workaround, the CUDA
+  toolkit 11.2 can be used. The actual NVIDIA drivers can be more recent.
+* CUDA toolkit version 12.0 is not compatible with LLVM OpenMP offload https://github.com/llvm/llvm-project/issues/60296
+
+### NEXUS
+
+* Nexus: Support for use of templates for job submission scripts [\#4344](https://github.com/QMCPACK/qmcpack/pull/4344)
+* Nexus: twist_info.dat files now added to results directory for easier analysis of twist average quantities
+  [\#4302](https://github.com/QMCPACK/qmcpack/pull/4302)
+* Nexus: Initial support for Polaris at ALCF [\#4354](https://github.com/QMCPACK/qmcpack/pull/4354)
+* Nexus: Initial support for Perlmutter at NERSC [\#4356](https://github.com/QMCPACK/qmcpack/pull/4356)
+* Nexus: Support for gpusharing keyword for legacy CUDA [\#4403](https://github.com/QMCPACK/qmcpack/pull/4403)
+* Nexus: Support for handling multiple pickle protocols [\#4385](https://github.com/QMCPACK/qmcpack/pull/4385)
+* Nexus: CPU/GPU flags for batched code [\#4341](https://github.com/QMCPACK/qmcpack/pull/4341)
+* Nexus: Jastrow factors can be read from existing files [\#4339](https://github.com/QMCPACK/qmcpack/pull/4339)
+* Nexus: Fix VASP POSCAR write [\#4331](https://github.com/QMCPACK/qmcpack/pull/4331)
+* Nexus: Better handling of VASP pseudopotentials [\#4330](https://github.com/QMCPACK/qmcpack/pull/4330)
+
+### Known problems
+
+* The new QE7.1 DFT+U input style is not yet supported [\#4100](https://github.com/QMCPACK/qmcpack/issues/4100)
+
 ##  [3.15.0] - 2022-09-29
 
 This is a recommended release for all users. There are many quality of life
@@ -65,7 +152,7 @@ be required, we recommend trying these drivers now and reporting any issues.
 * Minimum CUDA version is 11.0 [\#3957](https://github.com/QMCPACK/qmcpack/pull/3957)
 * Minimum version of GCC is now v9.
 
-### Nexus
+### NEXUS
 
 * Nexus: support to current batched driver style. Example inputs for batched
   runs using trial wavefunctions from QE are included in
 
@@ -49,16 +49,16 @@ if(QMC_OMP)
 
     if(NOT DEFINED OFFLOAD_ARCH
        AND OFFLOAD_TARGET MATCHES "amdgcn")
-      if (DEFINED HIP_ARCH)
-        list(LENGTH HIP_ARCH NUMBER_HIP_ARCHITECTURES)
-        if(NUMBER_HIP_ARCHITECTURES EQUAL "1")
-          set(OFFLOAD_ARCH ${HIP_ARCH})
+      if (DEFINED CMAKE_HIP_ARCHITECTURES)
+        list(LENGTH CMAKE_HIP_ARCHITECTURES NUMBER_CMAKE_HIP_ARCHITECTURESITECTURES)
+        if(NUMBER_CMAKE_HIP_ARCHITECTURESITECTURES EQUAL "1")
+          set(OFFLOAD_ARCH ${CMAKE_HIP_ARCHITECTURES})
         else()
           message(
             FATAL_ERROR
               "LLVM does not yet support offload to multiple architectures! "
-              "Deriving OFFLOAD_ARCH from HIP_ARCH failed. "
-              "Please keep only one entry in HIP_ARCH or set OFFLOAD_ARCH.")
+              "Deriving OFFLOAD_ARCH from CMAKE_HIP_ARCHITECTURES failed. "
+              "Please keep only one entry in CMAKE_HIP_ARCHITECTURES or set OFFLOAD_ARCH.")
         endif()
       else()
         set(OFFLOAD_ARCH gfx906)
 
@@ -7,17 +7,15 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM")
   if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 2021.3)
     message(FATAL_ERROR "Requires Intel oneAPI 2021.3 or higher!")
   endif()
-elseif(INTEL_ONEAPI_COMPILER_FOUND)
-  # in this case, the version string reported based on Clang, not accurate enough. just skip check.
 else()
-  if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 19.1.0)
-    message(FATAL_ERROR "Requires Intel classic compiler 19.1 or higher!")
+  if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 2021.1)
+    message(FATAL_ERROR "Requires Intel classic compiler 2021.1 or higher!")
   endif()
 endif()
 
 # Enable OpenMP
 if(QMC_OMP)
-  if(CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM" OR INTEL_ONEAPI_COMPILER_FOUND)
+  if(CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM")
     if(ENABLE_OFFLOAD)
       set(OFFLOAD_TARGET
           "spir64"
@@ -35,7 +33,7 @@ if(QMC_OMP)
   endif()
 endif(QMC_OMP)
 
-if(CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM" OR INTEL_ONEAPI_COMPILER_FOUND)
+if(CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM")
   # oneAPI compiler options
 
   # Set clang specific flags (which we always want)
@@ -84,7 +82,7 @@ endif()
 if(NOT CMAKE_SYSTEM_NAME STREQUAL "CrayLinuxEnvironment")
 
   # use -x for classic compiler only. this option is not robust with oneAPI compiler as 2021.3 release
-  if(NOT CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM" AND NOT INTEL_ONEAPI_COMPILER_FOUND)
+  if(NOT CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM")
     set(X_OPTION "^-x| -x")
     set(AX_OPTION "^-ax| -ax")
     #check if the user has already specified -x option for cross-compiling.
 
@@ -233,11 +233,6 @@ if( "x${CMAKE_CXX_COMPILER_ID}" STREQUAL "xClang" OR
   set(SYCL_FLAGS "-fsycl")
 endif()
 
-# Based on Compiler ID, add support for DPCPP
-if( "x${CMAKE_CXX_COMPILER_ID}" STREQUAL "xIntelLLVM")
-  list(PREPEND SYCL_FLAGS "--dpcpp")
-endif()
-
 # TODO verify if this is needed
 # Windows: Add Exception handling
 if(WIN32)
 
@@ -38,11 +38,11 @@ if(NOT CXX17_LIBRARY_OKAY)
   message("compiler is ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
   if(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
     message("Compiler detected is g++.\n  Use version 9.0 or newer for complete C++17 standard library support.")
-  elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+  elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM")
     message(
-      "Compiler detected is clang++.\n  If not using libcxx, ensure a GCC toolchain version equal or greater "
-      "than 9.0 gets picked up. Check with 'clang++ -v'. Or use the --gcc-toolchain compiler option "
-      "(added to CMAKE_CXX_FLAGS) to point to a newer GCC installation."
+      "Compiler detected is <Clang> namely clang++ or a vendor variant (icpx, amdclang++, armclang++).\n  If not using libcxx, ensure a GCC toolchain version equal or greater "
+      "than 9.0 gets picked up. Check with '<Clang> -v'. Or use the --gcc-toolchain compiler option "
+      "(added to both CMAKE_C_FLAGS and CMAKE_CXX_FLAGS) to point to a newer GCC installation."
     )
   elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
     message(
 
@@ -1,5 +1,13 @@
 # Test that if a C++ compiler is compatiable with the libstdc++ in use
 
+# Test "#include <cstdio>" before version compatibility checks.
+include(CheckIncludeFileCXX)
+check_include_file_cxx(cstdio INCLUDE_CSTDIO_WORKS)
+if(NOT INCLUDE_CSTDIO_WORKS)
+  unset(INCLUDE_CSTDIO_WORKS CACHE)
+  message(FATAL_ERROR "`#include <cstdio>` test failed! Please provide a working C++ compiler.")
+endif()
+
 try_compile(
   LIBSTDCXX_OKAY
   ${CMAKE_BINARY_DIR}