From 6250e16e1d4905587bec38fc6e390df91ed1eb0c Mon Sep 17 00:00:00 2001 From: nehaprakriya Date: Wed, 22 May 2024 16:01:47 -0700 Subject: [PATCH] SpectraFlux --- regression/spectraflux/Makefile | 94 +++ regression/spectraflux/connectivity.cfg | 28 + regression/spectraflux/data/README | 1 + regression/spectraflux/src/host/host.cpp | 617 ++++++++++++++ regression/spectraflux/src/host/host.h | 256 ++++++ regression/spectraflux/src/host_p2p/host.cpp | 793 ++++++++++++++++++ regression/spectraflux/src/host_p2p/host.h | 273 ++++++ regression/spectraflux/src/host_p2p/tqdm.hpp | 574 +++++++++++++ .../spectraflux/src/host_p2p/xcl2/xcl2.cpp | 180 ++++ .../spectraflux/src/host_p2p/xcl2/xcl2.hpp | 118 +++ regression/spectraflux/src/kernel/hac.cpp | 737 ++++++++++++++++ regression/spectraflux/src/kernel/hac.h | 82 ++ regression/spectraflux/src/kernel_tap/hac.cpp | 702 ++++++++++++++++ regression/spectraflux/src/kernel_tap/hac.h | 72 ++ .../spectraflux/src/kernel_tapa_PCIe/hac.h | 72 ++ regression/spectraflux/xrt.ini | 7 + 16 files changed, 4606 insertions(+) create mode 100644 regression/spectraflux/Makefile create mode 100644 regression/spectraflux/connectivity.cfg create mode 100644 regression/spectraflux/data/README create mode 100644 regression/spectraflux/src/host/host.cpp create mode 100644 regression/spectraflux/src/host/host.h create mode 100644 regression/spectraflux/src/host_p2p/host.cpp create mode 100644 regression/spectraflux/src/host_p2p/host.h create mode 100644 regression/spectraflux/src/host_p2p/tqdm.hpp create mode 100644 regression/spectraflux/src/host_p2p/xcl2/xcl2.cpp create mode 100644 regression/spectraflux/src/host_p2p/xcl2/xcl2.hpp create mode 100644 regression/spectraflux/src/kernel/hac.cpp create mode 100644 regression/spectraflux/src/kernel/hac.h create mode 100644 regression/spectraflux/src/kernel_tap/hac.cpp create mode 100644 regression/spectraflux/src/kernel_tap/hac.h create mode 100644 regression/spectraflux/src/kernel_tapa_PCIe/hac.h create mode 100644 regression/spectraflux/xrt.ini diff --git a/regression/spectraflux/Makefile b/regression/spectraflux/Makefile new file mode 100644 index 0000000..9ec6a15 --- /dev/null +++ b/regression/spectraflux/Makefile @@ -0,0 +1,94 @@ +VPP := $(XILINX_VITIS)/bin/v++ +TAPAC := $(HOME)/.local/bin/tapac +EMCONFIGUTIL := $(XILINX_VITIS)/bin/emconfigutil +MODE := hw +PLATFORM := xilinx_u55c_gen3x16_xdma_3_202210_1 + +# sources +KERNEL_SRC := src/kernel/hac.cpp +KERNEL_HEADER := src/kernel/hac.h +HOST_SRC := src/host/host.cpp +HOST_P2P_SRC := src/host_p2p/host.cpp src/host_p2p/xcl2/*.cpp +TAPA_KERNEL_SRC := src/kernel_tap/hac.cpp +TAPA_KERNEL_HEADER := src/kernel_tap/hac.h + +# targets +HOST_EXE := host.exe +HOST_P2P_EXE := host_p2p.exe +WORK_DIR := run3 + +XOS_ENCODING := encoding_kernel.$(MODE).xo +XOS_CLUSTERING := clustering_kernel.$(MODE).xo +XOS_TAPA := $(WORK_DIR)/wrapper.$(MODE).xo +XOS_WRAPPER := wrapper.$(MODE).xo + +XCLBIN_ENCODING := hac_encoding.$(MODE).xclbin +XCLBIN := hac.$(MODE).xclbin +EMCONFIG_FILE := emconfig_new.json + +VPP_LINK_OPTS := --profile.data all:all:all --profile.memory all --profile.stall all:all --profile.exec all:all --profile_kernel data:all:all:all --config connectivity.cfg +VPP_COMMON_OPTS := -s -t $(MODE) --platform $(PLATFORM) --report_level 2 --kernel_frequency 300 --optimize 3 +TAPA_OPTS := --platform $(PLATFORM) --connectivity connectivity.cfg --work-dir ${WORK_DIR} --floorplan-output "${WORK_DIR}/wrapper.tcl" + + +CFLAGS := -g -O3 -std=c++17 -I$(XILINX_XRT)/include -I$(XILINX_HLS)/include +LFLAGS := -L$(XILINX_XRT)/lib -lxilinxopencl -lpthread -lrt +NUMDEVICES := 1 + +# run time args +EXE_OPT := hac.$(MODE).xclbin + +# primary build targets +.PHONY: xclbin app all + +# xclbin: $(XCLBIN) +# xclbin_enc: $(XCLBIN_ENCODING) + +app: $(HOST_EXE) $(HOST_P2P_EXE) + +# xo: $(XOS_ENCODING) $(XOS_CLUSTERING) +# xo_enc: $(XOS_ENCODING) +# xo_clu: $(XOS_CLUSTERING) +xo_tapa: $(XOS_TAPA) +# all: xclbin app +xo_wrp: $(XOS_WRAPPER) + +# clean: +# -$(RM) $(HOST_EXE) $(EXE_OPT) $(XOS_ENCODING) $(XOS_CLUSTERING) $(EMCONFIG_FILE) + +# kernel rules for encoding +$(XOS_ENCODING): $(KERNEL_SRC) $(KERNEL_HEADER) + $(RM) $@ + $(VPP) $(VPP_COMMON_OPTS) --hls.clock 200000000:encoding_kernel -c -k encoding_kernel -o $@ $+ + +# kernel rules for clustering +$(XOS_CLUSTERING): $(KERNEL_SRC) $(KERNEL_HEADER) + $(RM) $@ + $(VPP) $(VPP_COMMON_OPTS) --hls.clock 150000000:clustering_kernel -c -k clustering_kernel -o $@ $+ +$(XOS_WRAPPER): $(KERNEL_SRC) $(KERNEL_HEADER) + $(RM) $@ + $(VPP) $(VPP_COMMON_OPTS) --hls.clock 300000000:top_wrapper -c -k top_wrapper -o $@ $+ + +$(XOS_TAPA): $(TAPA_KERNEL_SRC) + mkdir -p $(WORK_DIR) + $(TAPAC) $(TAPA_OPTS) --top wrapper -o $@ $+ + + +$(XCLBIN): $(XOS_WRAPPER)# $(XOS_ENCODING) $(XOS_CLUSTERING) + $(VPP) $(VPP_COMMON_OPTS) -l -o $@ $^ $(VPP_LINK_OPTS) + +# host rules +$(HOST_EXE): $(HOST_SRC) + g++ $(CFLAGS) -o $@ $+ $(LFLAGS) + @echo 'Compiled Host Executable: $(HOST_EXE)' + +$(HOST_P2P_EXE): $(HOST_P2P_SRC) + g++ $(CFLAGS) -o $@ $+ $(LFLAGS) + @echo 'Compiled Host Executable: $(HOST_P2P_EXE)' + +$(EMCONFIG_FILE): + $(EMCONFIGUTIL) --nd $(NUMDEVICES) --od . --platform $(PLATFORM) + +check: $(XCLBIN) $(HOST_EXE) $(EMCONFIG_FILE) + XCL_EMULATION_MODE=${MODE} ./$(HOST_EXE) $(EXE_OPT) + diff --git a/regression/spectraflux/connectivity.cfg b/regression/spectraflux/connectivity.cfg new file mode 100644 index 0000000..a378be1 --- /dev/null +++ b/regression/spectraflux/connectivity.cfg @@ -0,0 +1,28 @@ +[connectivity] +nk=wrapper:1:wrapper_1 + +sp=wrapper_1.ID_Level_buffer:HBM[12] +sp=wrapper_1.peak_mz_buffer:HBM[13] +sp=wrapper_1.peak_intensity_buffer:HBM[14] +sp=wrapper_1.peak_count_buffer:HBM[12] +sp=wrapper_1.num_spectra_buffer:HBM[15] + +sp=wrapper_1.c0_num_valid_clusters:HBM[0:1] +sp=wrapper_1.c0_num_elements:HBM[0:1] +sp=wrapper_1.c0_consensus:HBM[0:1] +sp=wrapper_1.c0_elements:HBM[2:3] + +sp=wrapper_1.c1_num_valid_clusters:HBM[16:17] +sp=wrapper_1.c1_num_elements:HBM[16:17] +sp=wrapper_1.c1_consensus:HBM[16:17] +sp=wrapper_1.c1_elements:HBM[18:19] + +sp=wrapper_1.c2_num_valid_clusters:HBM[24:25] +sp=wrapper_1.c2_num_elements:HBM[24:25] +sp=wrapper_1.c2_consensus:HBM[24:25] +sp=wrapper_1.c2_elements:HBM[26:27] + +sp=wrapper_1.c3_num_valid_clusters:HBM[28:29] +sp=wrapper_1.c3_num_elements:HBM[28:29] +sp=wrapper_1.c3_consensus:HBM[28:29] +sp=wrapper_1.c3_elements:HBM[30:31] \ No newline at end of file diff --git a/regression/spectraflux/data/README b/regression/spectraflux/data/README new file mode 100644 index 0000000..f1b454a --- /dev/null +++ b/regression/spectraflux/data/README @@ -0,0 +1 @@ +Download data from: https://drive.google.com/drive/folders/1qyiS229CryJoqDkScHsO0wNFaPBwOVT-?usp=sharing \ No newline at end of file diff --git a/regression/spectraflux/src/host/host.cpp b/regression/spectraflux/src/host/host.cpp new file mode 100644 index 0000000..54cebec --- /dev/null +++ b/regression/spectraflux/src/host/host.cpp @@ -0,0 +1,617 @@ + + +#include "host.h" + +int main(int argc, char **argv) +{ + auto start_time = std::chrono::high_resolution_clock::now(); + + std::cout <<"HV Width: " << sizeof(bitset_dhv) << "Byte" << std::endl; + + // OpenCL-related var + cl_program program; + cl_kernel kernels[NUM_KERNELS]; + cl_kernel kernels_encoding[NUM_KERNELS_ENCODING]; + cl_int err; + cl_command_queue q[NUM_KERNELS]; + cl_device_id device; + cl_platform_id platform; + cl_int ret; + cl_uint num_platforms; + + // Kernel's buffers on host + // std::vector> valid_clusters(NUM_KERNELS, std::vector(MAX_NUM_SPECTRA)); + std::vector> elements(NUM_KERNELS, std::vector(MAX_NUM_SPECTRA * clu_MAX_BATCH_SIZE, 0)); + std::vector> num_elements(NUM_KERNELS, std::vector(MAX_NUM_SPECTRA * clu_MAX_BATCH_SIZE, 0)); + std::vector> consensus(NUM_KERNELS, std::vector(MAX_NUM_SPECTRA * clu_MAX_BATCH_SIZE, 0)); + std::vector> num_valid_clusters(NUM_KERNELS, std::vector(clu_MAX_BATCH_SIZE, 0)); + // std::vector> num_consensus(NUM_KERNELS, std::vector(MAX_NUM_SPECTRA, 0)); + + std::vector num_spectra_arr(NUM_BATCH); + + std::vector> encoded_spectra_hbm(NUM_BATCH); + std::vector> encoded_spectra(NUM_BATCH); + + + // Read the bucket_size file + std::multimap bucket_sizes = load_bucket_sizes("./data/small1511_bucket.csv"); + std::vector> bucket_sizes_vec(bucket_sizes.begin(), bucket_sizes.end()); + + + for (int ii = 0; ii < NUM_BATCH; ii++) { + num_spectra_arr[ii] = bucket_sizes_vec[ii].second; + // num_valid_clusters[ii] = bucket_sizes_vec[ii].second; + } + + + // Read the CSV file + std::vector>> spectra; + auto start_timex = std::chrono::high_resolution_clock::now(); + read_processed_csv_files("./data/small1511_mz.csv", "./data/small1511_intensity.csv", spectra); + auto end_time_csv = std::chrono::high_resolution_clock::now(); + auto time_csv = std::chrono::duration_cast(end_time_csv - start_timex).count(); + std::cout << "Time to read CSV files: " << time_csv << " milliseconds" << std::endl; + + + // Parse the spectra file + std::vector> peak_mz_buffer(NUM_BATCH/mMAX_BATCH_SIZE+1, std::vector(mMAX_BATCH_SIZE * MAX_NUM_SPECTRA * MAX_PEAKS, 0)); + std::vector> peak_intensity_buffer(NUM_BATCH/mMAX_BATCH_SIZE+1, std::vector(mMAX_BATCH_SIZE * MAX_NUM_SPECTRA * MAX_PEAKS, 0)); + std::vector> peak_count_buffer(NUM_BATCH/mMAX_BATCH_SIZE+1, std::vector(mMAX_BATCH_SIZE * MAX_NUM_SPECTRA, 0)); + + auto start_timey = std::chrono::high_resolution_clock::now(); + int start_spectra = 0; + for (int i = 0; i < NUM_BATCH; i++) { + int idx = i / mMAX_BATCH_SIZE; + int offset = i % mMAX_BATCH_SIZE; + read_input_data(spectra, peak_mz_buffer[idx].data() + offset * MAX_NUM_SPECTRA * MAX_PEAKS, peak_intensity_buffer[idx].data() + offset * MAX_NUM_SPECTRA * MAX_PEAKS, + peak_count_buffer[idx].data() + offset * MAX_NUM_SPECTRA, start_spectra, num_spectra_arr[i]); + + std::cout << start_spectra << std::endl; + } + auto end_time_loop = std::chrono::high_resolution_clock::now(); + auto time_loop = std::chrono::duration_cast(end_time_loop - start_timey).count(); + std::cout << "Time for loop: " << time_loop << " milliseconds" << std::endl; + std::cout << "spectra size" << spectra.size() << std::endl; + + + // Load the HD model + std::vector id_hypervectors; + std::vector level_hypervectors; + bitset_dhv id[f]; + bitset_dhv level[Q]; + bitset_dhv id_level[f+Q]; + + read_hypervectors("./data/ID_Hypervectors.txt", id_hypervectors); + read_hypervectors("./data/Level_Hypervectors.txt", level_hypervectors); + + + + for (int i = 0; i < id_hypervectors.size(); i++) { + // id[i] = id_hypervectors[i]; + id_level[i] = id_hypervectors[i]; + } + for (int i = 0; i < level_hypervectors.size(); i++) { + // level[i] = level_hypervectors[i]; + id_level[i+f] = level_hypervectors[i]; + } + assert(id_hypervectors.size()==f); + assert(level_hypervectors.size()==Q); + + + + if (argc < 2) + { + std::cout << "Usage: " << argv[0] << " \n"; + return 1; + } + std::string binaryFile = argv[1]; + + + //********************************************* Create the kernel ********************************************* + + err = clGetPlatformIDs(0, NULL, &num_platforms); + if (err != CL_SUCCESS) + { + std::cerr << "Error: clGetPlatformIDs() failed with error code " << err << std::endl; + return EXIT_FAILURE; + } + + std::vector platforms(num_platforms); + err = clGetPlatformIDs(num_platforms, platforms.data(), NULL); + if (err != CL_SUCCESS) + { + std::cerr << "Error: clGetPlatformIDs() failed with error code " << err << std::endl; + return EXIT_FAILURE; + } + + bool platform_found = false; + for (cl_uint i = 0; i < num_platforms; i++) + { + char platform_name[128]; + err = clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(platform_name), platform_name, NULL); + if (err != CL_SUCCESS) + { + std::cerr << "Error: clGetPlatformInfo() failed with error code " << err << std::endl; + return EXIT_FAILURE; + } + + if (std::string(platform_name).find("Xilinx") != std::string::npos) + { + platform = platforms[i]; + platform_found = true; + break; + } + } + + if (!platform_found) + { + std::cerr << "Error: Xilinx platform not found" << std::endl; + return EXIT_FAILURE; + } + + clGetDeviceIDs(platform, CL_DEVICE_TYPE_ACCELERATOR, 1, &device, NULL); + + + cl_context context; + context = clCreateContext(0, 1, &device, NULL, NULL, &err); + for(int i=0; i(XCL_MEM_TOPOLOGY | (i)), &id[0], 0}; + // // ext_buffer_level[i] = {static_cast(XCL_MEM_TOPOLOGY | (i)), &level[0], 0}; + // ext_buffer_id_level[i] = {static_cast(XCL_MEM_TOPOLOGY | (i)), &id_level[0], 0}; + // // ext_buffer_peak_mz[i] = {static_cast(XCL_MEM_TOPOLOGY | (i)), peak_mz_buffer[i].data(), 0}; + // ext_buffer_peak_mz[i] = {static_cast(XCL_MEM_TOPOLOGY | (i)), &peak_mz_buffer[i][0], 0}; + // ext_buffer_peak_intensity[i] = {static_cast(XCL_MEM_TOPOLOGY | (i)), peak_intensity_buffer[i].data(), 0}; + // ext_buffer_peak_count[i] = {static_cast(XCL_MEM_TOPOLOGY | (i)), peak_count_buffer[i].data(), 0}; + + // } + + + // for (int i = 0; i < NUM_KERNELS; i++) { + // ext_buffer_encoded_spectra[i] = {static_cast(XCL_MEM_TOPOLOGY | 4*(i)+1), &encoded_spectra[i][0], 0}; + // ext_buffer_consensus[i] = {static_cast(XCL_MEM_TOPOLOGY | 4*(i)+1), &consensus[i][0], 0}; + // ext_buffer_num_consensus[i] = {static_cast(XCL_MEM_TOPOLOGY | 4*(i)+1), &num_consensus[i][0], 0}; + // ext_buffer_valid_clusters[i] = {static_cast(XCL_MEM_TOPOLOGY | 4*(i)+1), &valid_clusters[i][0], 0}; + // ext_buffer_elements[i] = {static_cast(XCL_MEM_TOPOLOGY | 4*(i)+1), &elements[i][0], 0}; + // ext_buffer_num_elements[i] = {static_cast(XCL_MEM_TOPOLOGY | 4*(i)+1), &num_elements[i][0], 0}; + // ext_buffer_num_valid_clusters[i] = {static_cast(XCL_MEM_TOPOLOGY | 4*(i)+1), &num_valid_clusters[i], 0}; + // } + + + + + for (int i = 0; i < NUM_KERNELS_ENCODING; i++) { + // buffer_id[i] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_EXT_PTR_XILINX, sizeof(bitset_dhv) * f, &ext_buffer_id[i], &err); + // buffer_level[i] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_EXT_PTR_XILINX, sizeof(bitset_dhv) * Q, &ext_buffer_level[i], &err); + buffer_id_level[i] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(bitset_dhv) * (f+Q), &id_level[0], &err); + + buffer_peak_mz[i] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * mMAX_BATCH_SIZE * MAX_NUM_SPECTRA * MAX_PEAKS, peak_mz_buffer[i].data(), &err); + + buffer_peak_intensity[i] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * mMAX_BATCH_SIZE * MAX_NUM_SPECTRA * MAX_PEAKS, peak_intensity_buffer[i].data(), &err); + + buffer_peak_count[i] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * mMAX_BATCH_SIZE * MAX_NUM_SPECTRA, peak_count_buffer[i].data(), &err); + // buffer_encoded_spectra_hbm[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(bitset_dhv)*MAX_NUM_SPECTRA*NUM_BATCH, &encoded_spectra_hbm[i][0], &err); + buffer_num_spectra[i] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * mMAX_BATCH_SIZE, num_spectra_arr.data(), &err); + + if (err != CL_SUCCESS) { + std::cerr << "Failed to create buffers for kernel NUM_KERNELS_ENCODING" << i << ". Error code: " << err << std::endl; + return EXIT_FAILURE; + } + } + + std::cout<<"1111"< Host + for (int j = 0; j < NUM_KERNELS; j++) { + // err = clEnqueueReadBuffer(q[j], buffer_valid_clusters[j], CL_TRUE, 0, sizeof(int) * MAX_NUM_SPECTRA, valid_clusters[j].data(), 0, NULL, NULL); + err |= clEnqueueReadBuffer(q[j], buffer_num_valid_clusters[j], CL_TRUE, 0, sizeof(int), &num_valid_clusters[j], 0, NULL, NULL); + err |= clEnqueueReadBuffer(q[j], buffer_elements[j], CL_TRUE, 0, sizeof(int) * MAX_NUM_SPECTRA * CLUSTER_SIZE, elements[j].data(), 0, NULL, NULL); + err |= clEnqueueReadBuffer(q[j], buffer_num_elements[j], CL_TRUE, 0, sizeof(int) * MAX_NUM_SPECTRA, num_elements[j].data(), 0, NULL, NULL); + err |= clEnqueueReadBuffer(q[j], buffer_consensus[j], CL_TRUE, 0, sizeof(int) * MAX_NUM_SPECTRA, consensus[j].data(), 0, NULL, NULL); + // err |= clEnqueueReadBuffer(q[j], buffer_num_consensus[j], CL_TRUE, 0, sizeof(int) * MAX_NUM_SPECTRA, num_consensus[j].data(), 0, NULL, NULL); + if (err != CL_SUCCESS) { + std::cerr << "Failed to read from buffers for kernel " << j << "." << std::endl; + return EXIT_FAILURE; + } + } + for (int j = 0; j < NUM_KERNELS; j++) { + clFinish(q[j]); + } + + + + // Write output file + // for (int i = 0; i < NUM_KERNELS; i++) { + // std::stringstream filename; + // std::stringstream filename2; + // filename << "Final_norm_new_combined_test.txt"; + // filename2 << "Final_serial_combined_test.txt"; + // print_clusters(elements[i].data(), num_elements[i].data(), consensus[i].data(), valid_clusters[i].data(), num_valid_clusters[i], filename2.str().c_str(), current_cluster_id, current_index_id); + + // print_clusters_temp(elements[i].data(), num_elements[i].data(), consensus[i].data(), valid_clusters[i].data(), num_valid_clusters[i], filename.str().c_str()); + + // current_cluster_id += num_valid_clusters[i]; + // current_index_id += num_spectra_arr[(ii*NUM_KERNELS+i)]; + + // } + } + + + + } + + +//********************************************* Run clustering ********************************************* + + /*for (int ii = 0; ii < NUM_BATCH/NUM_KERNELS; ii++) { + cl_event kernel_events[NUM_KERNELS]; + + for (int j = 0; j < NUM_KERNELS; j++) { + + err |= clSetKernelArg(kernels[j], 6, sizeof(cl_mem), &buffer_encoded_spectra[j]); + err |= clSetKernelArg(kernels[j], 7, sizeof(int), &num_spectra_arr[(ii*NUM_KERNELS+j)]); + err |= clSetKernelArg(kernels[j], 8, sizeof(cl_mem), &buffer_valid_clusters[j]); + err |= clSetKernelArg(kernels[j], 9, sizeof(cl_mem), &buffer_num_valid_clusters[j]); + err |= clSetKernelArg(kernels[j], 10, sizeof(cl_mem), &buffer_elements[j]); + err |= clSetKernelArg(kernels[j], 11, sizeof(cl_mem), &buffer_num_elements[j]); + err |= clSetKernelArg(kernels[j], 12, sizeof(cl_mem), &buffer_consensus[j]); + err |= clSetKernelArg(kernels[j], 13, sizeof(cl_mem), &buffer_num_consensus[j]); + int batch_size = 0; + err |= clSetKernelArg(kernels_encoding[j], 5, sizeof(int), &batch_size); + + + if (err != CL_SUCCESS) { + std::cerr << "Failed to set kernel arguments for kernel " << j << std::endl; + return EXIT_FAILURE; + } + + std::cout << ii * NUM_KERNELS + j < Host + for (int j = 0; j < NUM_KERNELS; j++) { + err = clEnqueueReadBuffer(q[j], buffer_valid_clusters[j], CL_TRUE, 0, sizeof(int) * MAX_NUM_SPECTRA, valid_clusters[j].data(), 0, NULL, NULL); + err |= clEnqueueReadBuffer(q[j], buffer_num_valid_clusters[j], CL_TRUE, 0, sizeof(int), &num_valid_clusters[j], 0, NULL, NULL); + err |= clEnqueueReadBuffer(q[j], buffer_elements[j], CL_TRUE, 0, sizeof(int) * MAX_NUM_SPECTRA * CLUSTER_SIZE, elements[j].data(), 0, NULL, NULL); + err |= clEnqueueReadBuffer(q[j], buffer_num_elements[j], CL_TRUE, 0, sizeof(int) * MAX_NUM_SPECTRA, num_elements[j].data(), 0, NULL, NULL); + err |= clEnqueueReadBuffer(q[j], buffer_consensus[j], CL_TRUE, 0, sizeof(int) * MAX_NUM_SPECTRA, consensus[j].data(), 0, NULL, NULL); + err |= clEnqueueReadBuffer(q[j], buffer_num_consensus[j], CL_TRUE, 0, sizeof(int) * MAX_NUM_SPECTRA, num_consensus[j].data(), 0, NULL, NULL); + if (err != CL_SUCCESS) { + std::cerr << "Failed to read from buffers for kernel " << j << "." << std::endl; + return EXIT_FAILURE; + } + } + + + // Write output file + for (int i = 0; i < NUM_KERNELS; i++) { + std::stringstream filename; + std::stringstream filename2; + filename << "Final_norm_new_combined_test.txt"; + filename2 << "Final_serial_combined_test.txt"; + print_clusters(elements[i].data(), num_elements[i].data(), consensus[i].data(), valid_clusters[i].data(), num_valid_clusters[i], filename2.str().c_str(), current_cluster_id, current_index_id); + + print_clusters_temp(elements[i].data(), num_elements[i].data(), consensus[i].data(), valid_clusters[i].data(), num_valid_clusters[i], filename.str().c_str()); + + current_cluster_id += num_valid_clusters[i]; + current_index_id += num_spectra_arr[(ii*NUM_KERNELS+i)]; + + } + } + + + + + } */ + + + + auto end_time = std::chrono::high_resolution_clock::now(); + auto total_duration = std::chrono::duration_cast(end_time - start_time); + auto kernel_duration = std::chrono::duration_cast(end_time - start_time2); + std::cout << "Total time: " << total_duration.count() << " ms\n"; + std::cout << "Clustering time: " << kernel_duration.count() << " ms\n"; + + + return 0; +} + + \ No newline at end of file diff --git a/regression/spectraflux/src/host/host.h b/regression/spectraflux/src/host/host.h new file mode 100644 index 0000000..47e1079 --- /dev/null +++ b/regression/spectraflux/src/host/host.h @@ -0,0 +1,256 @@ + + + +#include +#include +#include +#include +#include +//#include +#include +#include +#include + +#include +#include +#include +#include + + + + +#include "../kernel/hac.h" + + + + // const int NUM_BATCH = 51923; + const int mMAX_BATCH_SIZE = 1024; + const int NUM_BATCH = 2048; + + #define NUM_KERNELS N_CLUSTERING + #define NUM_KERNELS_ENCODING 1 + + const int clu_MAX_BATCH_SIZE = (mMAX_BATCH_SIZE+NUM_KERNELS)/NUM_KERNELS; + + + + + +void read_processed_csv_files(const std::string &input_file_mz, const std::string &input_file_intensity, + std::vector>> &spectra) +{ + std::ifstream infile_mz(input_file_mz); + std::ifstream infile_intensity(input_file_intensity); + std::string line_mz, line_intensity; + + while (std::getline(infile_mz, line_mz) && std::getline(infile_intensity, line_intensity)) + { + std::vector> spectrum; + std::istringstream iss_mz(line_mz), iss_intensity(line_intensity); + std::string token_mz, token_intensity; + + while (std::getline(iss_mz, token_mz, ',') && std::getline(iss_intensity, token_intensity, ',')) + { + float mz = std::stof(token_mz); + float intensity = std::stof(token_intensity); + + if(mz != -1.0f) { + spectrum.push_back({mz, intensity}); + } + } + + spectra.push_back(spectrum); + } +} + + + + + + +void read_input_data(const std::vector>> &spectra, + int *peak_mz_buffer, + int *peak_intensity_buffer, + int *peak_count_buffer, + int &start_spectra, + int num_spectra) { + + int end_spectra = start_spectra + num_spectra; + + for (int i = 0; i < num_spectra; i++) { + int peak_count = spectra[start_spectra + i].size(); + peak_count_buffer[i] = peak_count; + } + + for (int i = 0; i < num_spectra; i++) { + for (int j = 0; j < peak_count_buffer[i]; j++) { + float mz = spectra[start_spectra + i][j].first; + int quantized_mz = static_cast(2 * mz); + peak_mz_buffer[i * MAX_PEAKS + j] = quantized_mz; + float intensity = spectra[start_spectra + i][j].second; + int partitioned_intensity = static_cast(intensity * (Q - 1)); + peak_intensity_buffer[i * MAX_PEAKS + j] = partitioned_intensity; + } + } + + start_spectra = end_spectra; + + + +} + + + + + + +std::multimap load_bucket_sizes(const std::string &filename) { + std::multimap bucket_sizes; + std::ifstream file(filename); + + std::string line; + std::getline(file, line); + + while (std::getline(file, line)) { + std::istringstream ss(line); + std::string bucket_str, spectra_str; + + std::getline(ss, bucket_str, ','); + std::getline(ss, spectra_str, ','); + + int bucket = std::stoi(bucket_str); + int spectra = std::stoi(spectra_str); + + bucket_sizes.insert(std::make_pair(bucket, spectra)); + } + + return bucket_sizes; +} + + + + + + + + + +bitset_dhv read_encoded_vector(const std::string &line) { + bitset_dhv encoded_vector; + for (int i = 0; i < Dhv; i++) { + encoded_vector[i] = (line[i] == '1'); + } + return encoded_vector; +} + + + + + + +void read_hypervectors(const std::string &filename, std::vector &hypervectors) { + std::ifstream infile(filename); + if (!infile) { + std::cerr << "Error: Unable to open the input file '" << filename << "'." << std::endl; + return; + } + std::string line; + while (std::getline(infile, line)) { + hypervectors.push_back(read_encoded_vector(line)); + } + infile.close(); +} + + + + + + void print_clusters_temp( + const int *elements, + const int *num_elements, + const int *consensus, + const int *valid_clusters, + int num_valid_clusters, + const std::string& file_path){ + + std::ofstream file(file_path, std::ios_base::app); + + + + if (!file.is_open()) { + std::cerr << "Unable to open file: " << file_path << std::endl; + return; + } + + + // for (int i=0; i> results; + + + for (int i = 0; i < num_valid_clusters; i++) { + int cluster_idx = valid_clusters[i]; + for (int j = 0; j < num_elements[i]; j++) { + int element = elements[i * CLUSTER_SIZE + j]; + bool is_consensus = element == consensus[i]; + results.emplace_back(element+idx_offset, i + offset, is_consensus); + } + } + + + std::sort(results.begin(), results.end()); + + + for (const auto &result : results) { + file << std::get<0>(result) << ", " << std::get<1>(result) << ", " << (std::get<2>(result) ? "TRUE" : "FALSE") << std::endl; + } + + + file.close(); +} diff --git a/regression/spectraflux/src/host_p2p/host.cpp b/regression/spectraflux/src/host_p2p/host.cpp new file mode 100644 index 0000000..e6a073f --- /dev/null +++ b/regression/spectraflux/src/host_p2p/host.cpp @@ -0,0 +1,793 @@ + + +#include "host.h" + +decltype(&xclGetMemObjectFd) xcl::P2P::getMemObjectFd = nullptr; +decltype(&xclGetMemObjectFromFd) xcl::P2P::getMemObjectFromFd = nullptr; + +cl_program xcl_import_binary_file(cl_device_id device_id, cl_context context, const char* xclbin_file_name); +//================================================================================================== + + +int main(int argc, char **argv) +{ + auto start_time = std::chrono::high_resolution_clock::now(); + + std::cout <<"HV Width: " << sizeof(bitset_dhv) << "Byte" << std::endl; + + // OpenCL-related var + // cl_kernel kernels[NUM_KERNELS]; + // cl_kernel kernels_encoding[NUM_KERNELS_ENCODING]; + cl_int err; + cl_command_queue q[N_FPGA]; + cl_device_id device[N_FPGA]; + cl_context context[N_FPGA]; + cl_program program[N_FPGA]; + cl_platform_id platform; + cl_int ret; + cl_uint num_platforms; + + // Kernel's buffers on host + // std::vector> valid_clusters(NUM_KERNELS, std::vector(MAX_NUM_SPECTRA)); + std::vector> elements(NUM_KERNELS, std::vector(MAX_NUM_SPECTRA * clu_MAX_BATCH_SIZE, 0)); + std::vector> num_elements(NUM_KERNELS, std::vector(MAX_NUM_SPECTRA * clu_MAX_BATCH_SIZE, 0)); + std::vector> consensus(NUM_KERNELS, std::vector(MAX_NUM_SPECTRA * clu_MAX_BATCH_SIZE, 0)); + std::vector> num_valid_clusters(NUM_KERNELS, std::vector(clu_MAX_BATCH_SIZE, 0)); + // std::vector> num_consensus(NUM_KERNELS, std::vector(MAX_NUM_SPECTRA, 0)); + + std::vector num_spectra_arr(NUM_BATCH); + + std::vector> encoded_spectra_hbm(NUM_BATCH); + std::vector> encoded_spectra(NUM_BATCH); + + + // Read the bucket_size file + std::multimap bucket_sizes = load_bucket_sizes("./data/small1511_bucket.csv"); + // std::multimap bucket_sizes = load_bucket_sizes("/home/coder/Spec-HD/data/bucket_breakdown_1511_300.csv"); + std::vector> bucket_sizes_vec(bucket_sizes.begin(), bucket_sizes.end()); + + + for (int ii = 0; ii < NUM_BATCH; ii++) { + num_spectra_arr[ii] = bucket_sizes_vec[ii].second; + // num_valid_clusters[ii] = bucket_sizes_vec[ii].second; + } + + + // Read the CSV file + std::vector>> spectra; + auto start_timex = std::chrono::high_resolution_clock::now(); + // read_processed_csv_files("/home/coder/Spec-HD/data/spectra_mz_1511_1.csv", "/home/coder/Spec-HD/data/spectra_intensity_1511_1.csv", spectra); + read_processed_csv_files("./data/small1511_mz.csv", "./data/small1511_intensity.csv", spectra); + auto end_time_csv = std::chrono::high_resolution_clock::now(); + auto time_csv = std::chrono::duration_cast(end_time_csv - start_timex).count(); + std::cout << "Time to read CSV files: " << time_csv << " milliseconds" << std::endl; + + + // Parse the spectra file + std::vector> peak_mz_buffer(NUM_BATCH/mMAX_BATCH_SIZE+1, std::vector(mMAX_BATCH_SIZE * MAX_NUM_SPECTRA * MAX_PEAKS, 0)); + std::vector> peak_intensity_buffer(NUM_BATCH/mMAX_BATCH_SIZE+1, std::vector(mMAX_BATCH_SIZE * MAX_NUM_SPECTRA * MAX_PEAKS, 0)); + std::vector> peak_count_buffer(NUM_BATCH/mMAX_BATCH_SIZE+1, std::vector(mMAX_BATCH_SIZE * MAX_NUM_SPECTRA, 0)); + + auto start_timey = std::chrono::high_resolution_clock::now(); + int start_spectra = 0; + for (int i = 0; i < NUM_BATCH; i++) { + int idx = i / mMAX_BATCH_SIZE; + int offset = i % mMAX_BATCH_SIZE; + read_input_data(spectra, peak_mz_buffer[idx].data() + offset * MAX_NUM_SPECTRA * MAX_PEAKS, peak_intensity_buffer[idx].data() + offset * MAX_NUM_SPECTRA * MAX_PEAKS, + peak_count_buffer[idx].data() + offset * MAX_NUM_SPECTRA, start_spectra, num_spectra_arr[i]); + + // std::cout << start_spectra << std::endl; + } + auto end_time_loop = std::chrono::high_resolution_clock::now(); + auto time_loop = std::chrono::duration_cast(end_time_loop - start_timey).count(); + std::cout << "Time for loop: " << time_loop << " milliseconds" << std::endl; + std::cout << "spectra size" << spectra.size() << std::endl; + + + // Load the HD model + std::vector id_hypervectors; + std::vector level_hypervectors; + bitset_dhv id[f]; + bitset_dhv level[Q]; + bitset_dhv id_level[f+Q]; + + read_hypervectors("./data/ID_Hypervectors.txt", id_hypervectors); + read_hypervectors("./data/Level_Hypervectors.txt", level_hypervectors); + + + + for (int i = 0; i < id_hypervectors.size(); i++) { + // id[i] = id_hypervectors[i]; + id_level[i] = id_hypervectors[i]; + } + for (int i = 0; i < level_hypervectors.size(); i++) { + // level[i] = level_hypervectors[i]; + id_level[i+f] = level_hypervectors[i]; + } + assert(id_hypervectors.size()==f); + assert(level_hypervectors.size()==Q); + + + + if (argc < 3) + { + std::cout << "Usage: " << argv[0] << " \n"; + return 1; + } + std::string binaryFile[N_FPGA]; + for (int i=0; i platforms(num_platforms); + err = clGetPlatformIDs(num_platforms, platforms.data(), NULL); + if (err != CL_SUCCESS) + { + std::cerr << "Error: clGetPlatformIDs() failed with error code " << err << std::endl; + return EXIT_FAILURE; + } + + bool platform_found = false; + for (cl_uint i = 0; i < num_platforms; i++) + { + char platform_name[128]; + err = clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(platform_name), platform_name, NULL); + if (err != CL_SUCCESS) + { + std::cerr << "Error: clGetPlatformInfo() failed with error code " << err << std::endl; + return EXIT_FAILURE; + } + + if (std::string(platform_name).find("Xilinx") != std::string::npos) + { + platform = platforms[i]; + platform_found = true; + break; + } + } + + if (!platform_found) + { + std::cerr << "Error: Xilinx platform not found" << std::endl; + return EXIT_FAILURE; + } + + + cl_uint device_count; + + + + clGetDeviceIDs(platform, CL_DEVICE_TYPE_ACCELERATOR, N_FPGA, device, NULL); + + for (int i=0; i(XCL_MEM_TOPOLOGY | (i)), &id[0], 0}; + // // ext_buffer_level[i] = {static_cast(XCL_MEM_TOPOLOGY | (i)), &level[0], 0}; + // ext_buffer_id_level[i] = {static_cast(XCL_MEM_TOPOLOGY | (i)), &id_level[0], 0}; + // // ext_buffer_peak_mz[i] = {static_cast(XCL_MEM_TOPOLOGY | (i)), peak_mz_buffer[i].data(), 0}; + // ext_buffer_peak_mz[i] = {static_cast(XCL_MEM_TOPOLOGY | (i)), &peak_mz_buffer[i][0], 0}; + // ext_buffer_peak_intensity[i] = {static_cast(XCL_MEM_TOPOLOGY | (i)), peak_intensity_buffer[i].data(), 0}; + // ext_buffer_peak_count[i] = {static_cast(XCL_MEM_TOPOLOGY | (i)), peak_count_buffer[i].data(), 0}; + + // } + + for (int i=0; i < NUM_KERNELS-1; i++){ + ext_buffer_temp_krnl2[i] = {XCL_MEM_EXT_P2P_BUFFER, nullptr, 0}; + } + + + // for (int i = 0; i < NUM_KERNELS; i++) { + // ext_buffer_encoded_spectra[i] = {static_cast(XCL_MEM_TOPOLOGY | 4*(i)+1), &encoded_spectra[i][0], 0}; + // ext_buffer_consensus[i] = {static_cast(XCL_MEM_TOPOLOGY | 4*(i)+1), &consensus[i][0], 0}; + // ext_buffer_num_consensus[i] = {static_cast(XCL_MEM_TOPOLOGY | 4*(i)+1), &num_consensus[i][0], 0}; + // ext_buffer_valid_clusters[i] = {static_cast(XCL_MEM_TOPOLOGY | 4*(i)+1), &valid_clusters[i][0], 0}; + // ext_buffer_elements[i] = {static_cast(XCL_MEM_TOPOLOGY | 4*(i)+1), &elements[i][0], 0}; + // ext_buffer_num_elements[i] = {static_cast(XCL_MEM_TOPOLOGY | 4*(i)+1), &num_elements[i][0], 0}; + // ext_buffer_num_valid_clusters[i] = {static_cast(XCL_MEM_TOPOLOGY | 4*(i)+1), &num_valid_clusters[i], 0}; + // } + + + + + for (int i = 0; i < NUM_KERNELS_ENCODING; i++) { + // buffer_id[i] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_EXT_PTR_XILINX, sizeof(bitset_dhv) * f, &ext_buffer_id[i], &err); + // buffer_level[i] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_EXT_PTR_XILINX, sizeof(bitset_dhv) * Q, &ext_buffer_level[i], &err); + buffer_id_level[i] = clCreateBuffer(context[0], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(bitset_dhv) * (f+Q), &id_level[0], &err); + + buffer_peak_mz[i] = clCreateBuffer(context[0], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * mMAX_BATCH_SIZE * MAX_NUM_SPECTRA * MAX_PEAKS, peak_mz_buffer[i].data(), &err); + + buffer_peak_intensity[i] = clCreateBuffer(context[0], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * mMAX_BATCH_SIZE * MAX_NUM_SPECTRA * MAX_PEAKS, peak_intensity_buffer[i].data(), &err); + + buffer_peak_count[i] = clCreateBuffer(context[0], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * mMAX_BATCH_SIZE * MAX_NUM_SPECTRA, peak_count_buffer[i].data(), &err); + // buffer_encoded_spectra_hbm[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(bitset_dhv)*MAX_NUM_SPECTRA*NUM_BATCH, &encoded_spectra_hbm[i][0], &err); + buffer_num_spectra[i] = clCreateBuffer(context[0], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * mMAX_BATCH_SIZE, num_spectra_arr.data(), &err); + + if (err != CL_SUCCESS) { + std::cerr << "Failed to create buffers for kernel NUM_KERNELS_ENCODING" << i << ". Error code: " << err << std::endl; + return EXIT_FAILURE; + } + } + + std::cout<<"1111"<0], CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * consensus[i].size(), consensus[i].data(), &err); + // buffer_num_consensus[i] = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_EXT_PTR_XILINX, sizeof(int) * num_consensus[i].size(), &ext_buffer_num_consensus[i], &err); + // buffer_valid_clusters[i] = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_EXT_PTR_XILINX, sizeof(int) * valid_clusters[i].size(), valid_clusters[i].data(), &err); + buffer_elements[i] = clCreateBuffer(context[i>0], CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * elements[i].size(), elements[i].data(), &err); + buffer_num_elements[i] = clCreateBuffer(context[i>0], CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * num_elements[i].size(), num_elements[i].data(), &err); + buffer_num_valid_clusters[i] = clCreateBuffer(context[i>0], CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * num_valid_clusters[i].size(), num_valid_clusters[i].data(), &err); + + if (err != CL_SUCCESS) { + std::cerr << "Failed to create buffers for kernel NUM_KERNELS" << i << ". Error code: " << err << std::endl; + return EXIT_FAILURE; + } + } + + std::cout<<"1111"< Host + for (int j = 0; j < NUM_KERNELS; j++) { + // err = clEnqueueReadBuffer(q[j], buffer_valid_clusters[j], CL_TRUE, 0, sizeof(int) * MAX_NUM_SPECTRA, valid_clusters[j].data(), 0, NULL, NULL); + err |= clEnqueueReadBuffer(q[j], buffer_num_valid_clusters[j], CL_TRUE, 0, sizeof(int), &num_valid_clusters[j], 0, NULL, NULL); + err |= clEnqueueReadBuffer(q[j], buffer_elements[j], CL_TRUE, 0, sizeof(int) * MAX_NUM_SPECTRA * CLUSTER_SIZE, elements[j].data(), 0, NULL, NULL); + err |= clEnqueueReadBuffer(q[j], buffer_num_elements[j], CL_TRUE, 0, sizeof(int) * MAX_NUM_SPECTRA, num_elements[j].data(), 0, NULL, NULL); + err |= clEnqueueReadBuffer(q[j], buffer_consensus[j], CL_TRUE, 0, sizeof(int) * MAX_NUM_SPECTRA, consensus[j].data(), 0, NULL, NULL); + // err |= clEnqueueReadBuffer(q[j], buffer_num_consensus[j], CL_TRUE, 0, sizeof(int) * MAX_NUM_SPECTRA, num_consensus[j].data(), 0, NULL, NULL); + if (err != CL_SUCCESS) { + std::cerr << "Failed to read from buffers for kernel " << j << "." << std::endl; + return EXIT_FAILURE; + } + } + for (int j = 0; j < NUM_KERNELS; j++) { + clFinish(q[j]); + } + + + + // Write output file + // for (int i = 0; i < NUM_KERNELS; i++) { + // std::stringstream filename; + // std::stringstream filename2; + // filename << "Final_norm_new_combined_test.txt"; + // filename2 << "Final_serial_combined_test.txt"; + // print_clusters(elements[i].data(), num_elements[i].data(), consensus[i].data(), valid_clusters[i].data(), num_valid_clusters[i], filename2.str().c_str(), current_cluster_id, current_index_id); + + // print_clusters_temp(elements[i].data(), num_elements[i].data(), consensus[i].data(), valid_clusters[i].data(), num_valid_clusters[i], filename.str().c_str()); + + // current_cluster_id += num_valid_clusters[i]; + // current_index_id += num_spectra_arr[(ii*NUM_KERNELS+i)]; + + // } + } + + + + } + +FINISH: +//********************************************* Run clustering ********************************************* + + /*for (int ii = 0; ii < NUM_BATCH/NUM_KERNELS; ii++) { + cl_event kernel_events[NUM_KERNELS]; + + for (int j = 0; j < NUM_KERNELS; j++) { + + err |= clSetKernelArg(kernels[j], 6, sizeof(cl_mem), &buffer_encoded_spectra[j]); + err |= clSetKernelArg(kernels[j], 7, sizeof(int), &num_spectra_arr[(ii*NUM_KERNELS+j)]); + err |= clSetKernelArg(kernels[j], 8, sizeof(cl_mem), &buffer_valid_clusters[j]); + err |= clSetKernelArg(kernels[j], 9, sizeof(cl_mem), &buffer_num_valid_clusters[j]); + err |= clSetKernelArg(kernels[j], 10, sizeof(cl_mem), &buffer_elements[j]); + err |= clSetKernelArg(kernels[j], 11, sizeof(cl_mem), &buffer_num_elements[j]); + err |= clSetKernelArg(kernels[j], 12, sizeof(cl_mem), &buffer_consensus[j]); + err |= clSetKernelArg(kernels[j], 13, sizeof(cl_mem), &buffer_num_consensus[j]); + int batch_size = 0; + err |= clSetKernelArg(kernels_encoding[j], 5, sizeof(int), &batch_size); + + + if (err != CL_SUCCESS) { + std::cerr << "Failed to set kernel arguments for kernel " << j << std::endl; + return EXIT_FAILURE; + } + + std::cout << ii * NUM_KERNELS + j < Host + for (int j = 0; j < NUM_KERNELS; j++) { + err = clEnqueueReadBuffer(q[j], buffer_valid_clusters[j], CL_TRUE, 0, sizeof(int) * MAX_NUM_SPECTRA, valid_clusters[j].data(), 0, NULL, NULL); + err |= clEnqueueReadBuffer(q[j], buffer_num_valid_clusters[j], CL_TRUE, 0, sizeof(int), &num_valid_clusters[j], 0, NULL, NULL); + err |= clEnqueueReadBuffer(q[j], buffer_elements[j], CL_TRUE, 0, sizeof(int) * MAX_NUM_SPECTRA * CLUSTER_SIZE, elements[j].data(), 0, NULL, NULL); + err |= clEnqueueReadBuffer(q[j], buffer_num_elements[j], CL_TRUE, 0, sizeof(int) * MAX_NUM_SPECTRA, num_elements[j].data(), 0, NULL, NULL); + err |= clEnqueueReadBuffer(q[j], buffer_consensus[j], CL_TRUE, 0, sizeof(int) * MAX_NUM_SPECTRA, consensus[j].data(), 0, NULL, NULL); + err |= clEnqueueReadBuffer(q[j], buffer_num_consensus[j], CL_TRUE, 0, sizeof(int) * MAX_NUM_SPECTRA, num_consensus[j].data(), 0, NULL, NULL); + if (err != CL_SUCCESS) { + std::cerr << "Failed to read from buffers for kernel " << j << "." << std::endl; + return EXIT_FAILURE; + } + } + + + // Write output file + for (int i = 0; i < NUM_KERNELS; i++) { + std::stringstream filename; + std::stringstream filename2; + filename << "Final_norm_new_combined_test.txt"; + filename2 << "Final_serial_combined_test.txt"; + print_clusters(elements[i].data(), num_elements[i].data(), consensus[i].data(), valid_clusters[i].data(), num_valid_clusters[i], filename2.str().c_str(), current_cluster_id, current_index_id); + + print_clusters_temp(elements[i].data(), num_elements[i].data(), consensus[i].data(), valid_clusters[i].data(), num_valid_clusters[i], filename.str().c_str()); + + current_cluster_id += num_valid_clusters[i]; + current_index_id += num_spectra_arr[(ii*NUM_KERNELS+i)]; + + } + } + + + + + } */ + + + + auto end_time = std::chrono::high_resolution_clock::now(); + auto total_duration = std::chrono::duration_cast(end_time - start_time); + auto kernel_duration = std::chrono::duration_cast(end_time - start_time2); + std::cout << "Total time: " << total_duration.count() << " ms\n"; + std::cout << "Clustering time: " << kernel_duration.count() << " ms\n"; + + + return 0; + +} + + + +// ============================ Helper Functions +// ========================================= +static int load_file_to_memory(const char* filename, char** result); +cl_program xcl_import_binary_file(cl_device_id device_id, cl_context context, const char* xclbin_file_name) { + int err; + + std::cout << "INFO: Importing " << xclbin_file_name << std::endl; + + if (access(xclbin_file_name, R_OK) != 0) { + return nullptr; + std::cerr << "ERROR: " << xclbin_file_name << "xclbin not available please build\n"; + exit(EXIT_FAILURE); + } + + char* krnl_bin; + const size_t krnl_size = load_file_to_memory(xclbin_file_name, &krnl_bin); + std::cout << "INFO: Loaded file\n"; + + cl_program program = + clCreateProgramWithBinary(context, 1, &device_id, &krnl_size, (const unsigned char**)&krnl_bin, nullptr, &err); + if ((!program) || (err != CL_SUCCESS)) { + std::cout << "Error: Failed to create compute program from binary " << err << std::endl; + std::cerr << "Test failed\n"; + exit(EXIT_FAILURE); + } + + std::cout << "INFO: Created Binary\n"; + + err = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr); + if (err != CL_SUCCESS) { + size_t len; + char buffer[2048]; + + clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); + std::cout << buffer << std::endl; + std::cerr << "Error: Failed to build program executable!\n"; + exit(EXIT_FAILURE); + } + + std::cout << "INFO: Built Program\n"; + + free(krnl_bin); + + return program; +} + +static void* smalloc(size_t size) { + void* ptr; + + ptr = malloc(size); + + if (ptr == nullptr) { + std::cerr << "Error: Cannot allocate memory\n"; + exit(EXIT_FAILURE); + } + return ptr; +} +static int load_file_to_memory(const char* filename, char** result) { + unsigned int size; + + FILE* _f = fopen(filename, "rb"); + if (_f == nullptr) { + *result = nullptr; + std::cerr << "Error: Could not read file" << filename << std::endl; + exit(EXIT_FAILURE); + } + + fseek(_f, 0, SEEK_END); + size = ftell(_f); + fseek(_f, 0, SEEK_SET); + + *result = (char*)smalloc(sizeof(char) * (size + 1)); + + if (size != fread(*result, sizeof(char), size, _f)) { + free(*result); + std::cerr << "Error: read of kernel failed\n"; + exit(EXIT_FAILURE); + } + + fclose(_f); + (*result)[size] = 0; + + return size; +} diff --git a/regression/spectraflux/src/host_p2p/host.h b/regression/spectraflux/src/host_p2p/host.h new file mode 100644 index 0000000..4c6affc --- /dev/null +++ b/regression/spectraflux/src/host_p2p/host.h @@ -0,0 +1,273 @@ + + + +#include +#include +#include +#include +#include +//#include +#include +#include +#include +// #include "xcl2.hpp" +#include "./xcl2/xcl2.hpp" + +#include +#include +#include +#include +// #include +// #include + +#include "tqdm.hpp" + +#include "../kernel/hac.h" + + + +const int NUM_BATCH = 51923; +const int mMAX_BATCH_SIZE = 4096; + +#define NUM_KERNELS N_CLUSTERING +#define NUM_KERNELS_ENCODING 1 + +#define N_FPGA 2 + +const int clu_MAX_BATCH_SIZE = (mMAX_BATCH_SIZE+NUM_KERNELS)/NUM_KERNELS; + + + + + + +void read_processed_csv_files(const std::string &input_file_mz, const std::string &input_file_intensity, + std::vector>> &spectra) +{ + std::ifstream infile_mz(input_file_mz); + std::ifstream infile_intensity(input_file_intensity); + std::string line_mz, line_intensity; + + int nt = 0; + while (std::getline(infile_mz, line_mz) && std::getline(infile_intensity, line_intensity)) nt++; + infile_mz.clear(); + infile_intensity.clear(); + infile_mz.seekg(0, std::ios::beg); + infile_intensity.seekg(0, std::ios::beg); + // std::cout<> spectrum; + std::istringstream iss_mz(line_mz), iss_intensity(line_intensity); + std::string token_mz, token_intensity; + + while (std::getline(iss_mz, token_mz, ',') && std::getline(iss_intensity, token_intensity, ',')) + { + float mz = std::stof(token_mz); + float intensity = std::stof(token_intensity); + + if(mz != -1.0f) { + spectrum.push_back({mz, intensity}); + } + } + + spectra.push_back(spectrum); + nt --; + } + std::cout<<"&*&&" << nt<>> &spectra, + int *peak_mz_buffer, + int *peak_intensity_buffer, + int *peak_count_buffer, + int &start_spectra, + int num_spectra) { + + int end_spectra = start_spectra + num_spectra; + + for (int i = 0; i < num_spectra; i++) { + int peak_count = spectra[start_spectra + i].size(); + peak_count_buffer[i] = peak_count; + } + + for (int i = 0; i < num_spectra; i++) { + for (int j = 0; j < peak_count_buffer[i]; j++) { + float mz = spectra[start_spectra + i][j].first; + int quantized_mz = static_cast(2 * mz); + peak_mz_buffer[i * MAX_PEAKS + j] = quantized_mz; + float intensity = spectra[start_spectra + i][j].second; + int partitioned_intensity = static_cast(intensity * (Q - 1)); + peak_intensity_buffer[i * MAX_PEAKS + j] = partitioned_intensity; + } + } + + start_spectra = end_spectra; + + + +} + + + + + + +std::multimap load_bucket_sizes(const std::string &filename) { + std::multimap bucket_sizes; + std::ifstream file(filename); + + std::string line; + std::getline(file, line); + + while (std::getline(file, line)) { + std::istringstream ss(line); + std::string bucket_str, spectra_str; + + std::getline(ss, bucket_str, ','); + std::getline(ss, spectra_str, ','); + + int bucket = std::stoi(bucket_str); + int spectra = std::stoi(spectra_str); + + bucket_sizes.insert(std::make_pair(bucket, spectra)); + } + + return bucket_sizes; +} + + + + + + + + + +bitset_dhv read_encoded_vector(const std::string &line) { + bitset_dhv encoded_vector; + for (int i = 0; i < Dhv; i++) { + encoded_vector[i] = (line[i] == '1'); + } + return encoded_vector; +} + + + + + + +void read_hypervectors(const std::string &filename, std::vector &hypervectors) { + std::ifstream infile(filename); + if (!infile) { + std::cerr << "Error: Unable to open the input file '" << filename << "'." << std::endl; + return; + } + std::string line; + while (std::getline(infile, line)) { + hypervectors.push_back(read_encoded_vector(line)); + } + infile.close(); +} + + + + + + void print_clusters_temp( + const int *elements, + const int *num_elements, + const int *consensus, + const int *valid_clusters, + int num_valid_clusters, + const std::string& file_path){ + + std::ofstream file(file_path, std::ios_base::app); + + + + if (!file.is_open()) { + std::cerr << "Unable to open file: " << file_path << std::endl; + return; + } + + + // for (int i=0; i> results; + + + for (int i = 0; i < num_valid_clusters; i++) { + int cluster_idx = valid_clusters[i]; + for (int j = 0; j < num_elements[i]; j++) { + int element = elements[i * CLUSTER_SIZE + j]; + bool is_consensus = element == consensus[i]; + results.emplace_back(element+idx_offset, i + offset, is_consensus); + } + } + + + std::sort(results.begin(), results.end()); + + + for (const auto &result : results) { + file << std::get<0>(result) << ", " << std::get<1>(result) << ", " << (std::get<2>(result) ? "TRUE" : "FALSE") << std::endl; + } + + + file.close(); +} \ No newline at end of file diff --git a/regression/spectraflux/src/host_p2p/tqdm.hpp b/regression/spectraflux/src/host_p2p/tqdm.hpp new file mode 100644 index 0000000..2b7e8fb --- /dev/null +++ b/regression/spectraflux/src/host_p2p/tqdm.hpp @@ -0,0 +1,574 @@ +#pragma once + +/* + *Copyright (c) 2018-2019 + * + *Permission is hereby granted, free of charge, to any person + *obtaining a copy of this software and associated documentation + *files (the "Software"), to deal in the Software without + *restriction, including without limitation the rights to use, + *copy, modify, merge, publish, distribute, sublicense, and/or sell + *copies of the Software, and to permit persons to whom the + *Software is furnished to do so, subject to the following + *conditions: + * + *The above copyright notice and this permission notice shall be + *included in all copies or substantial portions of the Software. + * + *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + *EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + *OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + *NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + *HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + *WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + *FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + *OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +// -------------------- chrono stuff -------------------- + +namespace tq +{ +using index = std::ptrdiff_t; // maybe std::size_t, but I hate unsigned types. +using time_point_t = std::chrono::time_point; + +inline double elapsed_seconds(time_point_t from, time_point_t to) +{ + using seconds = std::chrono::duration; + return std::chrono::duration_cast(to - from).count(); +} + +class Chronometer +{ +public: + Chronometer() : start_(std::chrono::steady_clock::now()) {} + + double reset() + { + auto previous = start_; + start_ = std::chrono::steady_clock::now(); + + return elapsed_seconds(previous, start_); + } + + [[nodiscard]] double peek() const + { + auto now = std::chrono::steady_clock::now(); + + return elapsed_seconds(start_, now); + } + + [[nodiscard]] time_point_t get_start() const { return start_; } + +private: + time_point_t start_; +}; + +// -------------------- progress_bar -------------------- +inline void clamp(double& x, double a, double b) +{ + if (x < a) x = a; + if (x > b) x = b; +} + +class progress_bar +{ +public: + void restart() + { + chronometer_.reset(); + refresh_.reset(); + } + + void update(double progress) + { + clamp(progress, 0, 1); + + if (time_since_refresh() > min_time_per_update_ || progress == 0 || + progress == 1) + { + reset_refresh_timer(); + display(progress); + } + suffix_.str(""); + } + + void set_ostream(std::ostream& os) { os_ = &os; } + void set_prefix(std::string s) { prefix_ = std::move(s); } + void set_bar_size(int size) { bar_size_ = size; } + void set_min_update_time(double time) { min_time_per_update_ = time; } + + template + progress_bar& operator<<(const T& t) + { + suffix_ << t; + return *this; + } + + double elapsed_time() const { return chronometer_.peek(); } + +private: + void display(double progress) + { + auto flags = os_->flags(); + + double t = chronometer_.peek(); + double eta = t/progress - t; + + std::stringstream bar; + + bar << '\r' << prefix_ << '{' << std::fixed << std::setprecision(1) + << std::setw(5) << 100*progress << "%} "; + + print_bar(bar, progress); + + bar << " (" << t << "s < " << eta << "s) "; + + std::string sbar = bar.str(); + std::string suffix = suffix_.str(); + + index out_size = sbar.size() + suffix.size(); + term_cols_ = std::max(term_cols_, out_size); + index num_blank = term_cols_ - out_size; + + (*os_) << sbar << suffix << std::string(num_blank, ' ') << std::flush; + + os_->flags(flags); + } + + void print_bar(std::stringstream& ss, double filled) const + { + auto num_filled = static_cast(std::round(filled*bar_size_)); + ss << '[' << std::string(num_filled, '#') + << std::string(bar_size_ - num_filled, ' ') << ']'; + } + + double time_since_refresh() const { return refresh_.peek(); } + void reset_refresh_timer() { refresh_.reset(); } + + Chronometer chronometer_{}; + Chronometer refresh_{}; + double min_time_per_update_{0.15}; // found experimentally + + std::ostream* os_{&std::cerr}; + + index bar_size_{40}; + index term_cols_{1}; + + std::string prefix_{}; + std::stringstream suffix_{}; +}; + +// -------------------- iter_wrapper -------------------- + +template +class iter_wrapper +{ +public: + using iterator_category = typename ForwardIter::iterator_category; + using value_type = typename ForwardIter::value_type; + using difference_type = typename ForwardIter::difference_type; + using pointer = typename ForwardIter::pointer; + using reference = typename ForwardIter::reference; + + iter_wrapper(ForwardIter it, Parent* parent) : current_(it), parent_(parent) + {} + + auto operator*() { return *current_; } + + void operator++() { ++current_; } + + template + bool operator!=(const Other& other) const + { + parent_->update(); // here and not in ++ because I need to run update + // before first advancement! + return current_ != other; + } + + bool operator!=(const iter_wrapper& other) const + { + parent_->update(); // here and not in ++ because I need to run update + // before first advancement! + return current_ != other.current_; + } + + [[nodiscard]] const ForwardIter& get() const { return current_; } + +private: + friend Parent; + ForwardIter current_; + Parent* parent_; +}; + +// -------------------- tqdm_for_lvalues -------------------- + +template +class tqdm_for_lvalues +{ +public: + using this_t = tqdm_for_lvalues; + using iterator = iter_wrapper; + using value_type = typename ForwardIter::value_type; + using size_type = index; + using difference_type = index; + + tqdm_for_lvalues(ForwardIter begin, EndIter end) + : first_(begin, this), last_(end), num_iters_(std::distance(begin, end)) + {} + + tqdm_for_lvalues(ForwardIter begin, EndIter end, index total) + : first_(begin, this), last_(end), num_iters_(total) + {} + + template + explicit tqdm_for_lvalues(Container& C) + : first_(C.begin(), this), last_(C.end()), num_iters_(C.size()) + {} + + template + explicit tqdm_for_lvalues(const Container& C) + : first_(C.begin(), this), last_(C.end()), num_iters_(C.size()) + {} + + tqdm_for_lvalues(const tqdm_for_lvalues&) = delete; + tqdm_for_lvalues(tqdm_for_lvalues&&) = delete; + tqdm_for_lvalues& operator=(tqdm_for_lvalues&&) = delete; + tqdm_for_lvalues& operator=(const tqdm_for_lvalues&) = delete; + ~tqdm_for_lvalues() = default; + + template + tqdm_for_lvalues(Container&&) = delete; // prevent misuse! + + iterator begin() + { + bar_.restart(); + iters_done_ = 0; + return first_; + } + + EndIter end() const { return last_; } + + void update() + { + ++iters_done_; + bar_.update(calc_progress()); + } + + void set_ostream(std::ostream& os) { bar_.set_ostream(os); } + void set_prefix(std::string s) { bar_.set_prefix(std::move(s)); } + void set_bar_size(int size) { bar_.set_bar_size(size); } + void set_min_update_time(double time) { bar_.set_min_update_time(time); } + + template + tqdm_for_lvalues& operator<<(const T& t) + { + bar_ << t; + return *this; + } + + void manually_set_progress(double to) + { + clamp(to, 0, 1); + iters_done_ = std::round(to*num_iters_); + } + +private: + double calc_progress() const + { + double denominator = num_iters_; + if (num_iters_ == 0) denominator += 1e-9; + return iters_done_/denominator; + } + + iterator first_; + EndIter last_; + index num_iters_{0}; + index iters_done_{0}; + progress_bar bar_; +}; + +template +tqdm_for_lvalues(Container&) -> tqdm_for_lvalues; + +template +tqdm_for_lvalues(const Container&) + -> tqdm_for_lvalues; + +// -------------------- tqdm_for_rvalues -------------------- + +template +class tqdm_for_rvalues +{ +public: + using iterator = typename Container::iterator; + using const_iterator = typename Container::const_iterator; + using value_type = typename Container::value_type; + + explicit tqdm_for_rvalues(Container&& C) + : C_(std::forward(C)), tqdm_(C_) + {} + + auto begin() { return tqdm_.begin(); } + + auto end() { return tqdm_.end(); } + + void update() { return tqdm_.update(); } + + void set_ostream(std::ostream& os) { tqdm_.set_ostream(os); } + void set_prefix(std::string s) { tqdm_.set_prefix(std::move(s)); } + void set_bar_size(int size) { tqdm_.set_bar_size(size); } + void set_min_update_time(double time) { tqdm_.set_min_update_time(time); } + + template + auto& operator<<(const T& t) + { + return tqdm_ << t; + } + + void advance(index amount) { tqdm_.advance(amount); } + + void manually_set_progress(double to) { tqdm_.manually_set_progress(to); } + +private: + Container C_; + tqdm_for_lvalues tqdm_; +}; + +template +tqdm_for_rvalues(Container &&) -> tqdm_for_rvalues; + +// -------------------- tqdm -------------------- +template +auto tqdm(const ForwardIter& first, const ForwardIter& last) +{ + return tqdm_for_lvalues(first, last); +} + +template +auto tqdm(const ForwardIter& first, const ForwardIter& last, index total) +{ + return tqdm_for_lvalues(first, last, total); +} + +template +auto tqdm(const Container& C) +{ + return tqdm_for_lvalues(C); +} + +template +auto tqdm(Container& C) +{ + return tqdm_for_lvalues(C); +} + +template +auto tqdm(Container&& C) +{ + return tqdm_for_rvalues(std::forward(C)); +} + +// -------------------- int_iterator -------------------- + +template +class int_iterator +{ +public: + using iterator_category = std::random_access_iterator_tag; + using value_type = IntType; + using difference_type = IntType; + using pointer = IntType*; + using reference = IntType&; + + explicit int_iterator(IntType val) : value_(val) {} + + IntType& operator*() { return value_; } + + int_iterator& operator++() + { + ++value_; + return *this; + } + int_iterator& operator--() + { + --value_; + return *this; + } + + int_iterator& operator+=(difference_type d) + { + value_ += d; + return *this; + } + + difference_type operator-(const int_iterator& other) const + { + return value_ - other.value_; + } + + bool operator!=(const int_iterator& other) const + { + return value_ != other.value_; + } + +private: + IntType value_; +}; + +// -------------------- range -------------------- +template +class range +{ +public: + using iterator = int_iterator; + using const_iterator = iterator; + using value_type = IntType; + + range(IntType first, IntType last) : first_(first), last_(last) {} + explicit range(IntType last) : first_(0), last_(last) {} + + [[nodiscard]] iterator begin() const { return first_; } + [[nodiscard]] iterator end() const { return last_; } + [[nodiscard]] index size() const { return last_ - first_; } + +private: + iterator first_; + iterator last_; +}; + +template +auto trange(IntType first, IntType last) +{ + return tqdm(range(first, last)); +} + +template +auto trange(IntType last) +{ + return tqdm(range(last)); +} + +// -------------------- timing_iterator -------------------- + +class timing_iterator_end_sentinel +{ +public: + explicit timing_iterator_end_sentinel(double num_seconds) + : num_seconds_(num_seconds) + {} + + [[nodiscard]] double num_seconds() const { return num_seconds_; } + +private: + double num_seconds_; +}; + +class timing_iterator +{ +public: + using iterator_category = std::forward_iterator_tag; + using value_type = double; + using difference_type = double; + using pointer = double*; + using reference = double&; + + double operator*() const { return chrono_.peek(); } + + timing_iterator& operator++() { return *this; } + + bool operator!=(const timing_iterator_end_sentinel& other) const + { + return chrono_.peek() < other.num_seconds(); + } + +private: + tq::Chronometer chrono_; +}; + +// -------------------- timer ------------------- +struct timer +{ +public: + using iterator = timing_iterator; + using end_iterator = timing_iterator_end_sentinel; + using const_iterator = iterator; + using value_type = double; + + explicit timer(double num_seconds) : num_seconds_(num_seconds) {} + + [[nodiscard]] static iterator begin() { return iterator(); } + [[nodiscard]] end_iterator end() const + { + return end_iterator(num_seconds_); + } + + [[nodiscard]] double num_seconds() const { return num_seconds_; } + +private: + double num_seconds_; +}; + +class tqdm_timer +{ +public: + using iterator = iter_wrapper; + using end_iterator = timer::end_iterator; + using value_type = typename timing_iterator::value_type; + using size_type = index; + using difference_type = index; + + explicit tqdm_timer(double num_seconds) : num_seconds_(num_seconds) {} + + tqdm_timer(const tqdm_timer&) = delete; + tqdm_timer(tqdm_timer&&) = delete; + tqdm_timer& operator=(tqdm_timer&&) = delete; + tqdm_timer& operator=(const tqdm_timer&) = delete; + ~tqdm_timer() = default; + + template + tqdm_timer(Container&&) = delete; // prevent misuse! + + iterator begin() + { + bar_.restart(); + return iterator(timing_iterator(), this); + } + + end_iterator end() const { return end_iterator(num_seconds_); } + + void update() + { + double t = bar_.elapsed_time(); + + bar_.update(t/num_seconds_); + } + + void set_ostream(std::ostream& os) { bar_.set_ostream(os); } + void set_prefix(std::string s) { bar_.set_prefix(std::move(s)); } + void set_bar_size(int size) { bar_.set_bar_size(size); } + void set_min_update_time(double time) { bar_.set_min_update_time(time); } + + template + tqdm_timer& operator<<(const T& t) + { + bar_ << t; + return *this; + } + +private: + double num_seconds_; + progress_bar bar_; +}; + +inline auto tqdm(timer t) { return tqdm_timer(t.num_seconds()); } + +} // namespace tqdm \ No newline at end of file diff --git a/regression/spectraflux/src/host_p2p/xcl2/xcl2.cpp b/regression/spectraflux/src/host_p2p/xcl2/xcl2.cpp new file mode 100644 index 0000000..13aad1c --- /dev/null +++ b/regression/spectraflux/src/host_p2p/xcl2/xcl2.cpp @@ -0,0 +1,180 @@ +/** +* Copyright (C) 2019-2021 Xilinx, Inc +* +* Licensed under the Apache License, Version 2.0 (the "License"). You may +* not use this file except in compliance with the License. A copy of the +* License is located at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +* License for the specific language governing permissions and limitations +* under the License. +*/ + +#include "xcl2.hpp" +#include +#include +#include +#include +#include +#if defined(_WINDOWS) +#include +#else +#include +#endif + +namespace xcl { +std::vector get_devices(const std::string& vendor_name) { + size_t i; + cl_int err; + std::vector platforms; + OCL_CHECK(err, err = cl::Platform::get(&platforms)); + cl::Platform platform; + for (i = 0; i < platforms.size(); i++) { + platform = platforms[i]; + OCL_CHECK(err, std::string platformName = platform.getInfo(&err)); + if (!(platformName.compare(vendor_name))) { + std::cout << "Found Platform" << std::endl; + std::cout << "Platform Name: " << platformName.c_str() << std::endl; + break; + } + } + if (i == platforms.size()) { + std::cout << "Error: Failed to find Xilinx platform" << std::endl; + std::cout << "Found the following platforms : " << std::endl; + for (size_t j = 0; j < platforms.size(); j++) { + platform = platforms[j]; + OCL_CHECK(err, std::string platformName = platform.getInfo(&err)); + std::cout << "Platform Name: " << platformName.c_str() << std::endl; + } + exit(EXIT_FAILURE); + } + // Getting ACCELERATOR Devices and selecting 1st such device + std::vector devices; + OCL_CHECK(err, err = platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices)); + return devices; +} + +std::vector get_xil_devices() { + return get_devices("Xilinx"); +} + +cl::Device find_device_bdf(const std::vector& devices, const std::string& bdf) { + char device_bdf[20]; + cl_int err; + cl::Device device; + int cnt = 0; + for (uint32_t i = 0; i < devices.size(); i++) { + OCL_CHECK(err, err = devices[i].getInfo(CL_DEVICE_PCIE_BDF, &device_bdf)); + if (bdf == device_bdf) { + device = devices[i]; + cnt++; + break; + } + } + if (cnt == 0) { + std::cout << "Invalid device bdf. Please check and provide valid bdf\n"; + exit(EXIT_FAILURE); + } + return device; +} +cl_device_id find_device_bdf_c(cl_device_id* devices, const std::string& bdf, cl_uint device_count) { + char device_bdf[20]; + cl_int err; + cl_device_id device; + int cnt = 0; + for (uint32_t i = 0; i < device_count; i++) { + err = clGetDeviceInfo(devices[i], CL_DEVICE_PCIE_BDF, sizeof(device_bdf), device_bdf, 0); + if (err != CL_SUCCESS) { + std::cout << "Unable to extract the device BDF details\n"; + exit(EXIT_FAILURE); + } + if (bdf == device_bdf) { + device = devices[i]; + cnt++; + break; + } + } + if (cnt == 0) { + std::cout << "Invalid device bdf. Please check and provide valid bdf\n"; + exit(EXIT_FAILURE); + } + return device; +} +std::vector read_binary_file(const std::string& xclbin_file_name) { + std::cout << "INFO: Reading " << xclbin_file_name << std::endl; + FILE* fp; + if ((fp = fopen(xclbin_file_name.c_str(), "r")) == nullptr) { + printf("ERROR: %s xclbin not available please build\n", xclbin_file_name.c_str()); + exit(EXIT_FAILURE); + } + // Loading XCL Bin into char buffer + std::cout << "Loading: '" << xclbin_file_name.c_str() << "'\n"; + std::ifstream bin_file(xclbin_file_name.c_str(), std::ifstream::binary); + bin_file.seekg(0, bin_file.end); + auto nb = bin_file.tellg(); + bin_file.seekg(0, bin_file.beg); + std::vector buf; + buf.resize(nb); + bin_file.read(reinterpret_cast(buf.data()), nb); + return buf; +} + +bool is_emulation() { + bool ret = false; + char* xcl_mode = getenv("XCL_EMULATION_MODE"); + if (xcl_mode != nullptr) { + ret = true; + } + return ret; +} + +bool is_hw_emulation() { + bool ret = false; + char* xcl_mode = getenv("XCL_EMULATION_MODE"); + if ((xcl_mode != nullptr) && !strcmp(xcl_mode, "hw_emu")) { + ret = true; + } + return ret; +} +double round_off(double n) { + double d = n * 100.0; + int i = d + 0.5; + d = i / 100.0; + return d; +} + +std::string convert_size(size_t size) { + static const char* SIZES[] = {"B", "KB", "MB", "GB"}; + uint32_t div = 0; + size_t rem = 0; + + while (size >= 1024 && div < (sizeof SIZES / sizeof *SIZES)) { + rem = (size % 1024); + div++; + size /= 1024; + } + + double size_d = (float)size + (float)rem / 1024.0; + double size_val = round_off(size_d); + + std::stringstream stream; + stream << std::fixed << std::setprecision(2) << size_val; + std::string size_str = stream.str(); + std::string result = size_str + " " + SIZES[div]; + return result; +} + +bool is_xpr_device(const char* device_name) { + const char* output = strstr(device_name, "xpr"); + + if (output == nullptr) { + return false; + } else { + return true; + } +} +}; // namespace xcl diff --git a/regression/spectraflux/src/host_p2p/xcl2/xcl2.hpp b/regression/spectraflux/src/host_p2p/xcl2/xcl2.hpp new file mode 100644 index 0000000..4dd521c --- /dev/null +++ b/regression/spectraflux/src/host_p2p/xcl2/xcl2.hpp @@ -0,0 +1,118 @@ +/** +* Copyright (C) 2019-2021 Xilinx, Inc +* +* Licensed under the Apache License, Version 2.0 (the "License"). You may +* not use this file except in compliance with the License. A copy of the +* License is located at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +* License for the specific language governing permissions and limitations +* under the License. +*/ + +#pragma once + +#define CL_HPP_CL_1_2_DEFAULT_BUILD +#define CL_HPP_TARGET_OPENCL_VERSION 120 +#define CL_HPP_MINIMUM_OPENCL_VERSION 120 +#define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1 +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS + +// OCL_CHECK doesn't work if call has templatized function call +#define OCL_CHECK(error, call) \ + call; \ + if (error != CL_SUCCESS) { \ + printf("%s:%d Error calling " #call ", error code is: %d\n", __FILE__, __LINE__, error); \ + exit(EXIT_FAILURE); \ + } + +#include +#include +#include +#include +// When creating a buffer with user pointer (CL_MEM_USE_HOST_PTR), under the +// hood +// User ptr is used if and only if it is properly aligned (page aligned). When +// not +// aligned, runtime has no choice but to create its own host side buffer that +// backs +// user ptr. This in turn implies that all operations that move data to and from +// device incur an extra memcpy to move data to/from runtime's own host buffer +// from/to user pointer. So it is recommended to use this allocator if user wish +// to +// Create Buffer/Memory Object with CL_MEM_USE_HOST_PTR to align user buffer to +// the +// page boundary. It will ensure that user buffer will be used when user create +// Buffer/Mem Object with CL_MEM_USE_HOST_PTR. +template +struct aligned_allocator { + using value_type = T; + + aligned_allocator() {} + + aligned_allocator(const aligned_allocator&) {} + + template + aligned_allocator(const aligned_allocator&) {} + + T* allocate(std::size_t num) { + void* ptr = nullptr; + +#if defined(_WINDOWS) + { + ptr = _aligned_malloc(num * sizeof(T), 4096); + if (ptr == nullptr) { + std::cout << "Failed to allocate memory" << std::endl; + exit(EXIT_FAILURE); + } + } +#else + { + if (posix_memalign(&ptr, 4096, num * sizeof(T))) throw std::bad_alloc(); + } +#endif + return reinterpret_cast(ptr); + } + void deallocate(T* p, std::size_t num) { +#if defined(_WINDOWS) + _aligned_free(p); +#else + free(p); +#endif + } +}; + +namespace xcl { +std::vector get_xil_devices(); +std::vector get_devices(const std::string& vendor_name); +cl::Device find_device_bdf(const std::vector& devices, const std::string& bdf); +cl_device_id find_device_bdf_c(cl_device_id* devices, const std::string& bdf, cl_uint dev_count); +std::string convert_size(size_t size); +std::vector read_binary_file(const std::string& xclbin_file_name); +bool is_emulation(); +bool is_hw_emulation(); +bool is_xpr_device(const char* device_name); +class P2P { + public: + static decltype(&xclGetMemObjectFd) getMemObjectFd; + static decltype(&xclGetMemObjectFromFd) getMemObjectFromFd; + static void init(const cl_platform_id& platform) { + void* bar = clGetExtensionFunctionAddressForPlatform(platform, "xclGetMemObjectFd"); + getMemObjectFd = (decltype(&xclGetMemObjectFd))bar; + bar = clGetExtensionFunctionAddressForPlatform(platform, "xclGetMemObjectFromFd"); + getMemObjectFromFd = (decltype(&xclGetMemObjectFromFd))bar; + } +}; +class Ext { + public: + static decltype(&xclGetComputeUnitInfo) getComputeUnitInfo; + static void init(const cl_platform_id& platform) { + void* bar = clGetExtensionFunctionAddressForPlatform(platform, "xclGetComputeUnitInfo"); + getComputeUnitInfo = (decltype(&xclGetComputeUnitInfo))bar; + } +}; +} diff --git a/regression/spectraflux/src/kernel/hac.cpp b/regression/spectraflux/src/kernel/hac.cpp new file mode 100644 index 0000000..5a8a6d7 --- /dev/null +++ b/regression/spectraflux/src/kernel/hac.cpp @@ -0,0 +1,737 @@ + +#include "hac.h" +#include +#include +#include +#include +#include + + +template +ap_uint popcount(ap_uint input) { + #pragma HLS INLINE OFF + ap_uint count = 0; + popcount_loop: for (int i = 0; i < N; i++) { + #pragma HLS UNROLL + count += input[i]; + } + return count; +} + +int index(int i, int j) { + #pragma HLS INLINE + if (i < j) + std::swap(i, j); + return i * (i + 1) / 2 + j; +} + + +void calculate_distance_matrix_kernel( +bitset_dhv encoded_spectra[MAX_NUM_SPECTRA], +int num_spectra, +distance_t max_cluster_distances[MAX_NUM_SPECTRA * (MAX_NUM_SPECTRA + 1) / 2], +distance_t original_distances[MAX_NUM_SPECTRA * (MAX_NUM_SPECTRA + 1) / 2], + +int *dist_lt_thres_cnt +) { + + int dist_lt_thres_cnt_tmp = 0; + + + calculate_distance: for (int i = 0; i < MAX_NUM_SPECTRA; i++) { + if (i < num_spectra){ + + inner_distance: for (int j = 0; j < i; j++) { + #pragma HLS loop_tripcount min=1 max=300 avg=150 + #pragma HLS PIPELINE II=1 + bitset_dhv xor_result = encoded_spectra[i] ^ encoded_spectra[j]; + distance_t popcount_result = popcount(xor_result); + auto idx = index(i, j); + max_cluster_distances[idx] = popcount_result; + original_distances[idx] = popcount_result; + + dist_lt_thres_cnt_tmp += (popcount_result <= DISTANCE_THRESHOLD); + } + } + } + + *dist_lt_thres_cnt = dist_lt_thres_cnt_tmp; + +} + + + +void update_max_cluster_distances( +distance_t max_cluster_distances[MAX_NUM_SPECTRA * (MAX_NUM_SPECTRA + 1) / 2], +int i, +int j, +int num_spectra, + +int *dist_lt_thres_cnt +) { + #pragma HLS INLINE + + update_loop: for (int k = 0; k < MAX_NUM_SPECTRA; k++) { + #pragma HLS DEPENDENCE variable=max_cluster_distances inter false + #pragma HLS PIPELINE + if (k != j && k !=i && k < num_spectra) { + auto idx_ik = index(i, k); + auto idx_jk = index(j, k); + + auto vi = max_cluster_distances[idx_ik]; + auto vj = max_cluster_distances[idx_jk]; + auto max_v = (vi > vj) ? vi : vj; + max_cluster_distances[idx_ik] = max_v; + + *dist_lt_thres_cnt -= (vj > DISTANCE_THRESHOLD && vi <= DISTANCE_THRESHOLD); + } + } +} + + +void agglomerative_ccl_kernel( +distance_t max_cluster_distances[MAX_NUM_SPECTRA * (MAX_NUM_SPECTRA + 1) / 2], +int num_spectra, +int valid_clusters[MAX_NUM_SPECTRA], +int* num_valid_clusters_ptr, + +Cluster thres_clusters[MAX_NUM_SPECTRA], +int valid_dist_cnt +){ + + + int valid_total_clusters[MAX_NUM_SPECTRA]; + + + int num_valid_clusters_thres = *num_valid_clusters_ptr; + + int stack[MAX_NUM_SPECTRA]; + int stack_ptr = 0; + + initialize_clusters: for (int i = 0; i < MAX_NUM_SPECTRA; i++) { + #pragma HLS UNROLL + valid_clusters[i] = i; + valid_total_clusters[i]=i; + thres_clusters[i].num_elements = 1; + thres_clusters[i].next = -1; + thres_clusters[i].end = i; + thres_clusters[i].check_cluster = 0; + + } + + main_while_loop: for (int num_valid_clusters = *num_valid_clusters_ptr; num_valid_clusters > 1 && valid_dist_cnt > 0; num_valid_clusters--) { + #pragma HLS loop_tripcount min=300 max=300 + std::pair closest_clusters; + #pragma HLS ARRAY_PARTITION variable=valid_total_clusters complete + bool merged = false; + + merged_while : while (merged == false) { + #pragma HLS loop_tripcount min=8 max=8 + + if (stack_ptr == 0) { + stack[stack_ptr++] = 0; + } + + distance_t min_distance = distance_t(0.95*Dhv); + int i = stack[stack_ptr - 1]; + int j = -1; + + min_row : for (int k = 0; k < num_valid_clusters; ++k) { + #pragma HLS loop_tripcount min=150 max=150 + #pragma HLS pipeline II=1 + #pragma HLS UNROLL FACTOR=2 + if (k != i && (max_cluster_distances[index(valid_total_clusters[i], valid_total_clusters[k])] <= min_distance)) { + min_distance = max_cluster_distances[index(valid_total_clusters[i], valid_total_clusters[k])]; + j = k; + + } + } + + + bool in_stack = false; + if (stack[stack_ptr - 2] == j) { // Found the local minimum + in_stack = true; + + } + + + if (in_stack ) { + + if (j +void calculate_consensus( +distance_t original_distances[MAX_NUM_SPECTRA * (MAX_NUM_SPECTRA + 1) / 2], +Cluster clusters[MAX_NUM_SPECTRA], +int valid_clusters[MAX_NUM_SPECTRA], +int consensus[MAX_NUM_SPECTRA], +int num_valid_clusters +) { + + using sum_distance_t = std::conditional_t; + + calculate_consensus_outer: for(int i = 0; i < num_valid_clusters; i++) { + #pragma HLS loop_tripcount min=30 max=30 + int cluster_idx = valid_clusters[i]; + int num_elements = clusters[cluster_idx].num_elements; + + int elements_tmp[MAX_NUM_SPECTRA]; + + #pragma HLS ARRAY_PARTITION variable=elements_tmp + int iter_element = cluster_idx; + for (int j = 0; j < num_elements; j++){ + #pragma HLS loop_tripcount min=7 max=7 + #pragma HLS pipeline + elements_tmp[j] = iter_element; + iter_element = clusters[iter_element].next; + } + + if(num_elements == 1) { + consensus[i] = elements_tmp[0]; + } else { + sum_distance_t min_sum_distance = distance_t(0.95 * Dhv) * (num_elements - 1); + + calculate_consensus_inner: for(int j = 0; j < num_elements; j++) { + #pragma HLS loop_tripcount min=7 max=7 + #pragma HLS pipeline + sum_distance_t sum_distance = 0; + calculate_sum_distance: for(int k = 0; k < num_elements; k++) { // * Only check k>j + #pragma HLS loop_tripcount min=7 max=7 + #pragma HLS pipeline + if(j != k) { + sum_distance += original_distances[index(elements_tmp[j], elements_tmp[k])]; + } + } + if (sum_distance < min_sum_distance) { + min_sum_distance = sum_distance; + consensus[i] = elements_tmp[j]; + } + } + } + } +} + +void hd_encoding( + bitset_dhv ID[f], + bitset_dhv Level[Q], + PeakBuffer &peak_buffer, + int peak_count, + bitset_dhv *encoded_spectra +) { + bitset_dhv xor_results[MAX_PEAKS]; + #pragma HLS BIND_STORAGE variable=ID type=RAM_1P impl=uram + + peak_cnt_t sum[Dhv]; + + #pragma HLS ARRAY_PARTITION variable=sum + + loop_init: for (int j=0; j half_peak_count_tmp) ? 1 : 0; + } + + *encoded_spectra = majority; +} + + + +void encoding_work_func( + bitset_dhv ID_local[f], + bitset_dhv Level_local[Q], + ap_uint *peak_mz_buffer, + ap_uint *peak_intensity_buffer, + int *peak_count_buffer, + int *num_spectra_buffer, + // bitset_dhv *encoded_spectra_hbm, + hls::stream encoded_spectra_stream[N_CLUSTERING], + + int batch_size + +){ + #pragma HLS dataflow + + hls::stream local_peak_stream; + hls::stream peak_count; + hls::stream encoded_spectra; + hls::stream num_spectra_stream; + hls::stream num_spectra_stream_c1; + + #pragma HLS STREAM variable=local_peak_stream depth=16 type=fifo + #pragma HLS STREAM variable=peak_count depth=16 type=fifo + #pragma HLS STREAM variable=encoded_spectra depth=16 type=fifo + #pragma HLS STREAM variable=num_spectra_stream depth=16 type=fifo + #pragma HLS STREAM variable=num_spectra_stream_c1 depth=16 type=fifo + + + + loop_stream_in: for (int ib = 0; ib < batch_size*MAX_NUM_SPECTRA; ib++) { + #pragma HLS loop_tripcount min=300000 max=300000 + // #pragma HLS pipeline + PeakBuffer local_peak; + #pragma HLS aggregate variable=local_peak + for (int j = 0; j < PEAK_LOOP_CNT; j++) { + auto tmp_mz = peak_mz_buffer[ib * PEAK_LOOP_CNT + j]; + auto tmp_intensity = peak_intensity_buffer[ib * PEAK_LOOP_CNT + j]; + for (int k = 0; k < PEAK_PORT_W/32; k++){ + #pragma HLS UNROLL + local_peak.mz[j*32 + k] = tmp_mz.range(k*32+31, k*32); + local_peak.intensity[j*32 + k] = tmp_intensity.range(k*32+31, k*32); + } + if (j == PEAK_LOOP_CNT-1){ //last iter + local_peak_stream << local_peak; + } + } + } + + + + loop_stream_pc_in: for (int ib = 0; ib < batch_size*MAX_NUM_SPECTRA; ib++) { + #pragma HLS loop_tripcount min=300000 max=300000 + peak_count << peak_count_buffer[ib]; + } + + + loop_workload: for (int b = 0; b < batch_size; b++) { + #pragma HLS loop_tripcount min=1000 max=1000 + #pragma HLS pipeline + int num_spectra_tmp = num_spectra_stream.read(); + num_spectra_stream_c1 << num_spectra_tmp; + loop_workload_inner:for (int i = 0; i < MAX_NUM_SPECTRA; i++){ + #pragma HLS loop_flatten off + PeakBuffer local_peak; + #pragma HLS aggregate variable=local_peak + local_peak_stream >> local_peak; + + peak_cnt_t local_peak_count; + bitset_dhv local_encoded_spectra; + peak_count >> local_peak_count; + local_peak_count = (i < num_spectra_tmp) ? local_peak_count : peak_cnt_t(0); + hd_encoding(ID_local, Level_local, local_peak, local_peak_count, &local_encoded_spectra); + encoded_spectra << local_encoded_spectra; + } + } + + loop_stream_num_in: for(int b=0; b < batch_size; b++){ + #pragma HLS loop_tripcount min=1000 max=1000 + #pragma HLS pipeline + num_spectra_stream << num_spectra_buffer[b]; + } + + loop_stream_out: for (int b = 0; b < batch_size; b++) { + #pragma HLS loop_tripcount min=1000 max=1000 + bitset_dhv local_encoded_buffer; + + int n = b % N_CLUSTERING; + int num_spectra_tmp; + num_spectra_stream_c1 >> num_spectra_tmp; + encoded_spectra_stream[n] << bitset_dhv(num_spectra_tmp); + loop_stream_out_buffer:for (int s=0; s < MAX_NUM_SPECTRA; s++){ + #pragma HLS pipeline + encoded_spectra >> local_encoded_buffer; + if (s < num_spectra_tmp){ + encoded_spectra_stream[n] << local_encoded_buffer; + } + } + + } +} + + +void encoding_kernel( + bitset_dhv *ID_Level_buffer, + ap_uint *peak_mz_buffer, + ap_uint *peak_intensity_buffer, + int *peak_count_buffer, + int *num_spectra_buffer, + // bitset_dhv *encoded_spectra_hbm, + hls::stream encoded_spectra_stream[N_CLUSTERING], + int batch_size +) { + // // #pragma HLS INTERFACE m_axi port=encoded_spectra_hbm offset=slave bundle=gmem num_read_outstanding=1 max_read_burst_length=2 + // #pragma HLS INTERFACE m_axi port=ID_Level_buffer offset=slave bundle=gmem0 num_write_outstanding=1 max_write_burst_length=2 + // #pragma HLS INTERFACE m_axi port=peak_mz_buffer offset=slave bundle=gmem2 num_write_outstanding=1 max_write_burst_length=2 + // #pragma HLS INTERFACE m_axi port=peak_intensity_buffer offset=slave bundle=gmem3 num_write_outstanding=1 max_write_burst_length=2 + // #pragma HLS INTERFACE m_axi port=peak_count_buffer offset=slave bundle=gmem4 num_write_outstanding=1 max_write_burst_length=2 + // #pragma HLS INTERFACE m_axi port=num_spectra_buffer offset=slave bundle=gmem5 num_write_outstanding=1 max_write_burst_length=2 + + bitset_dhv ID_local[f], Level_local[Q]; + #pragma HLS BIND_STORAGE variable=ID_local type=RAM_1P impl=uram + + + for(int j=0; j(original_distances, thres_clusters, local_valid_clusters, local_consensus, local_num_valid_clusters); + + *num_consensus = num_spectra; + *num_valid_clusters = local_num_valid_clusters; + + copy_cluster_elements: for (int i = 0; i < local_num_valid_clusters; i++) { + #pragma HLS loop_tripcount min=30 max=30 + #pragma HLS pipeline + #pragma HLS UNROLL FACTOR=2 skip_exit_check + + int cluster_idx = local_valid_clusters[i]; + num_elements[i] = thres_clusters[cluster_idx].num_elements; + valid_clusters[i] = local_valid_clusters[i]; + consensus[i] = local_consensus[i]; + + } + + + int element_copy_cnt = 0; + copy_cluster_elements_inner: for (int i = 0; i < local_num_valid_clusters; i++) { + #pragma HLS loop_tripcount min=30 max=30 + int cluster_idx = local_valid_clusters[i]; + int num_elements = thres_clusters[cluster_idx].num_elements; + int iter_element = cluster_idx; + for (int j = 0; j < num_elements; j++) { + #pragma HLS loop_tripcount min=4 max=20 + #pragma HLS pipeline + elements[i * CLUSTER_SIZE + j] = iter_element; + iter_element = thres_clusters[iter_element].next; + } + } + +} +*/ + +void clustering_kernel( +// bitset_dhv *encoded_spectra_buffer, +hls::stream &encoded_spectra_stream, +// int num_spectra, +int *num_valid_clusters, // 1 +int *elements, //[MAX_NUM_SPECTRA * CLUSTER_SIZE], +int *num_elements, // [CLUSTER_SIZE] +int *consensus, //[CLUSTER_SIZE], +int batch_size +) { + + // #pragma HLS INTERFACE m_axi port=num_valid_clusters offset=slave bundle=gmem_num num_read_outstanding=1 max_read_burst_length=2 + // #pragma HLS INTERFACE m_axi port=num_elements offset=slave bundle=gmem_num num_read_outstanding=1 max_read_burst_length=2 + // #pragma HLS INTERFACE m_axi port=consensus offset=slave bundle=gmem_num num_read_outstanding=1 max_read_burst_length=2 + // #pragma HLS INTERFACE m_axi port=elements offset=slave bundle=gmem_ele num_read_outstanding=1 max_read_burst_length=2 + + distance_t max_cluster_distances[MAX_NUM_SPECTRA * (MAX_NUM_SPECTRA + 1) / 2]; + distance_t original_distances[MAX_NUM_SPECTRA * (MAX_NUM_SPECTRA + 1) / 2]; + + #pragma HLS BIND_STORAGE variable=original_distances type=ram_2p impl=uram + #pragma HLS BIND_STORAGE variable=max_cluster_distances type=ram_2p impl=uram + + + int local_num_valid_clusters; + int local_consensus [MAX_NUM_SPECTRA]; + int local_valid_clusters [MAX_NUM_SPECTRA]; + bitset_dhv local_encoded_spectra [MAX_NUM_SPECTRA]; + + Cluster thres_clusters[MAX_NUM_SPECTRA]; + + for (int ib=0; ib(original_distances, thres_clusters, local_valid_clusters, local_consensus, local_num_valid_clusters); + + + int offset = batch_size*CLUSTER_SIZE; + num_valid_clusters[offset] = local_num_valid_clusters; + copy_cluster_elements: for (int i = 0; i < local_num_valid_clusters; i++) { + #pragma HLS loop_tripcount min=30 max=30 + #pragma HLS pipeline + #pragma HLS UNROLL FACTOR=2 skip_exit_check + + int cluster_idx = local_valid_clusters[i]; + // valid_clusters[offset + i] = cluster_idx; + num_elements[offset + i] = thres_clusters[cluster_idx].num_elements; + consensus[offset + i] = local_consensus[i]; + + } + + int element_copy_cnt = offset; + copy_cluster_elements_inner: for (int i = 0; i < local_num_valid_clusters; i++) { + #pragma HLS loop_tripcount min=30 max=30 + int cluster_idx = local_valid_clusters[i]; + int num_elements = thres_clusters[cluster_idx].num_elements; + int iter_element = cluster_idx; + for (int j = 0; j < num_elements; j++) { + #pragma HLS loop_tripcount min=4 max=20 + #pragma HLS pipeline + elements[element_copy_cnt++] = iter_element; + iter_element = thres_clusters[iter_element].next; + } + } + } +} + +void testout(hls::stream &encoded_spectra_stream) { + #pragma HLS INLINE off + int num_sp = encoded_spectra_stream.read(); + for (int j = 0; j < num_sp; ++j) { + #pragma HLS loop_tripcount min=300 max=300 + encoded_spectra_stream.read(); + } +} + +extern "C" void top_wrapper( + bitset_dhv *ID_Level_buffer, + ap_uint *peak_mz_buffer, + ap_uint *peak_intensity_buffer, + int *peak_count_buffer, + int *num_spectra_buffer, + // bitset_dhv *encoded_spectra_hbm, + int batch_size, + + + int *c0_num_valid_clusters, // 1 + int *c0_elements, //[MAX_NUM_SPECTRA * CLUSTER_SIZE], + int *c0_num_elements, // [CLUSTER_SIZE] + int *c0_consensus, //[CLUSTER_SIZE], + + int *c1_num_valid_clusters, // 1 + int *c1_elements, //[MAX_NUM_SPECTRA * CLUSTER_SIZE], + int *c1_num_elements, // [CLUSTER_SIZE] + int *c1_consensus, //[CLUSTER_SIZE], + + int *c2_num_valid_clusters, // 1 + int *c2_elements, //[MAX_NUM_SPECTRA * CLUSTER_SIZE], + int *c2_num_elements, // [CLUSTER_SIZE] + int *c2_consensus, //[CLUSTER_SIZE], + + int *c3_num_valid_clusters, // 1 + int *c3_elements, //[MAX_NUM_SPECTRA * CLUSTER_SIZE], + int *c3_num_elements, // [CLUSTER_SIZE] + int *c3_consensus //[CLUSTER_SIZE], +) { + + #pragma HLS INTERFACE m_axi port=ID_Level_buffer offset=slave bundle=gmem0 num_write_outstanding=1 max_write_burst_length=2 + #pragma HLS INTERFACE m_axi port=peak_mz_buffer offset=slave bundle=gmem2 num_write_outstanding=1 max_write_burst_length=2 + #pragma HLS INTERFACE m_axi port=peak_intensity_buffer offset=slave bundle=gmem3 num_write_outstanding=1 max_write_burst_length=2 + #pragma HLS INTERFACE m_axi port=peak_count_buffer offset=slave bundle=gmem0 num_write_outstanding=1 max_write_burst_length=2 + #pragma HLS INTERFACE m_axi port=num_spectra_buffer offset=slave bundle=gmem5 num_write_outstanding=1 max_write_burst_length=2 + + + #pragma HLS INTERFACE m_axi port=c0_num_valid_clusters offset=slave bundle=gmem0_num num_read_outstanding=1 max_read_burst_length=2 + #pragma HLS INTERFACE m_axi port=c0_num_elements offset=slave bundle=gmem0_num num_read_outstanding=1 max_read_burst_length=2 + #pragma HLS INTERFACE m_axi port=c0_consensus offset=slave bundle=gmem0_num num_read_outstanding=1 max_read_burst_length=2 + #pragma HLS INTERFACE m_axi port=c0_elements offset=slave bundle=gmem0_ele num_read_outstanding=1 max_read_burst_length=2 + + #pragma HLS INTERFACE m_axi port=c1_num_valid_clusters offset=slave bundle=gmem1_num num_read_outstanding=1 max_read_burst_length=2 + #pragma HLS INTERFACE m_axi port=c1_num_elements offset=slave bundle=gmem1_num num_read_outstanding=1 max_read_burst_length=2 + #pragma HLS INTERFACE m_axi port=c1_consensus offset=slave bundle=gmem1_num num_read_outstanding=1 max_read_burst_length=2 + #pragma HLS INTERFACE m_axi port=c1_elements offset=slave bundle=gmem1_ele num_read_outstanding=1 max_read_burst_length=2 + + #pragma HLS INTERFACE m_axi port=c2_num_valid_clusters offset=slave bundle=gmem2_num num_read_outstanding=1 max_read_burst_length=2 + #pragma HLS INTERFACE m_axi port=c2_num_elements offset=slave bundle=gmem2_num num_read_outstanding=1 max_read_burst_length=2 + #pragma HLS INTERFACE m_axi port=c2_consensus offset=slave bundle=gmem2_num num_read_outstanding=1 max_read_burst_length=2 + #pragma HLS INTERFACE m_axi port=c2_elements offset=slave bundle=gmem2_ele num_read_outstanding=1 max_read_burst_length=2 + + #pragma HLS INTERFACE m_axi port=c3_num_valid_clusters offset=slave bundle=gmem3_num num_read_outstanding=1 max_read_burst_length=2 + #pragma HLS INTERFACE m_axi port=c3_num_elements offset=slave bundle=gmem3_num num_read_outstanding=1 max_read_burst_length=2 + #pragma HLS INTERFACE m_axi port=c3_consensus offset=slave bundle=gmem3_num num_read_outstanding=1 max_read_burst_length=2 + #pragma HLS INTERFACE m_axi port=c3_elements offset=slave bundle=gmem3_ele num_read_outstanding=1 max_read_burst_length=2 + + // #pragma HLS interface mode=ap_ctrl_none port=return + + hls::stream encoded_spectra_stream[N_CLUSTERING]; + #pragma HLS STREAM variable=encoded_spectra_stream depth=333 type=fifo + + + #pragma HLS DATAFLOW + + int batch_size0 = (batch_size+3)/N_CLUSTERING; + int batch_size1 = (batch_size+2)/N_CLUSTERING; + int batch_size2 = (batch_size+1)/N_CLUSTERING; + int batch_size3 = (batch_size+0)/N_CLUSTERING; + encoding_kernel(ID_Level_buffer, peak_mz_buffer, peak_intensity_buffer, + peak_count_buffer, num_spectra_buffer, encoded_spectra_stream, + batch_size); + + clustering_kernel(encoded_spectra_stream[0],c0_num_valid_clusters, c0_elements, c0_num_elements, c0_consensus, batch_size0); + clustering_kernel(encoded_spectra_stream[1],c1_num_valid_clusters, c1_elements, c1_num_elements, c1_consensus, batch_size1); + clustering_kernel(encoded_spectra_stream[2],c2_num_valid_clusters, c2_elements, c2_num_elements, c2_consensus, batch_size2); + clustering_kernel(encoded_spectra_stream[3],c3_num_valid_clusters, c3_elements, c3_num_elements, c3_consensus, batch_size3); + + + +} + diff --git a/regression/spectraflux/src/kernel/hac.h b/regression/spectraflux/src/kernel/hac.h new file mode 100644 index 0000000..3cb791a --- /dev/null +++ b/regression/spectraflux/src/kernel/hac.h @@ -0,0 +1,82 @@ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "hls_task.h" + + +#define MAX_NUM_SPECTRA 300 +#define Dhv 2048 +#define f 4400 +#define Q 16 +#define MAX_PEAKS 64 +#define DISTANCE_THRESHOLD (0.25 * Dhv) +#define CLUSTER_SIZE 300 + +#define MAX_BATCH_SIZE 1024 + +#define N_CLUSTERING 4 + +#define PEAK_PORT_W 1024 +#define PEAK_LOOP_CNT ( (MAX_PEAKS+(PEAK_PORT_W/32)-1) / (PEAK_PORT_W/32) ) + +#define LOG_FUNCTION(X) (\ + X <= 15 ? 4 : \ + X <= 31 ? 5 : \ + X <= 63 ? 6 : \ + X <= 127 ? 7 : \ + X <= 255 ? 8 : \ + X <= 511 ? 9 : \ + X <= 1023 ? 10 : \ + X <= 2047 ? 11 : \ + X <= 4095 ? 12 : \ + X <= 8191 ? 13 : \ + X <= 16383 ? 14 : \ + X <= 65535 ? 16 : 32 \ + ) + + +#define LOG_MAX_PEAKS ( LOG_FUNCTION(MAX_PEAKS) ) +#define LOG_Dhv ( LOG_FUNCTION(Dhv) ) +#define LOG_MAX_NUM_SPECTRA ( LOG_FUNCTION(MAX_NUM_SPECTRA) ) + + + + +typedef ap_uint bitset_dhv; +typedef ap_uint peak_cnt_t; + +typedef ap_uint distance_t; +typedef ap_uint acc_distance_t; +typedef ap_uint acc_thres_distance_t; + + +struct Cluster { + int next; + int end; + int num_elements; + // float max_distance; + int check_cluster; +}; + + +struct PeakBuffer +{ + int mz[MAX_PEAKS]; + int intensity[MAX_PEAKS]; +}; + +// struct EncodedBuffer { +// int num_spectra; +// bitset_dhv encoded_spectra[MAX_BATCH_SIZE]; +// }; +typedef bitset_dhv EncodedBuffer; + diff --git a/regression/spectraflux/src/kernel_tap/hac.cpp b/regression/spectraflux/src/kernel_tap/hac.cpp new file mode 100644 index 0000000..b9575f3 --- /dev/null +++ b/regression/spectraflux/src/kernel_tap/hac.cpp @@ -0,0 +1,702 @@ +#include "hac.h" + + +template +ap_uint popcount(ap_uint input) { + #pragma HLS INLINE OFF + ap_uint count = 0; + popcount_loop: for (int i = 0; i < N; i++) { + #pragma HLS UNROLL + count += input[i]; + } + return count; +} + +int index(int i, int j) { + #pragma HLS INLINE + if (i < j) + std::swap(i, j); + return i * (i + 1) / 2 + j; +} + + +void calculate_distance_matrix_kernel( +bitset_dhv encoded_spectra[MAX_NUM_SPECTRA], +int num_spectra, +distance_t max_cluster_distances[MAX_NUM_SPECTRA * (MAX_NUM_SPECTRA + 1) / 2], +distance_t original_distances[MAX_NUM_SPECTRA * (MAX_NUM_SPECTRA + 1) / 2], +int *dist_lt_thres_cnt +) { + int dist_lt_thres_cnt_tmp = 0; + + calculate_distance: for (int i = 0; i < MAX_NUM_SPECTRA; i++) { + if (i < num_spectra){ + + inner_distance: for (int j = 0; j < i; j++) { + #pragma HLS loop_tripcount min=1 max=300 avg=150 + #pragma HLS PIPELINE II=1 + bitset_dhv xor_result = encoded_spectra[i] ^ encoded_spectra[j]; + distance_t popcount_result = popcount(xor_result); + auto idx = index(i, j); + // ap_fixed<16, 1> normalized_distance = static_cast(popcount_result) / Dhv; + max_cluster_distances[idx] = popcount_result; + original_distances[idx] = popcount_result; + + dist_lt_thres_cnt_tmp += (popcount_result <= DISTANCE_THRESHOLD); + } + } + } + + *dist_lt_thres_cnt = dist_lt_thres_cnt_tmp; + +} + + + +void update_max_cluster_distances( +distance_t max_cluster_distances[MAX_NUM_SPECTRA * (MAX_NUM_SPECTRA + 1) / 2], +int i, +int j, +int num_spectra, +int *dist_lt_thres_cnt +) { + #pragma HLS INLINE + update_loop: for (int k = 0; k < MAX_NUM_SPECTRA; k++) { + #pragma HLS DEPENDENCE variable=max_cluster_distances inter false + #pragma HLS PIPELINE + if (k != j && k !=i && k < num_spectra) { + auto idx_ik = index(i, k); + auto idx_jk = index(j, k); + + auto vi = max_cluster_distances[idx_ik]; + auto vj = max_cluster_distances[idx_jk]; + auto max_v = (vi > vj) ? vi : vj; + max_cluster_distances[idx_ik] = max_v; + + *dist_lt_thres_cnt -= (vj > DISTANCE_THRESHOLD && vi <= DISTANCE_THRESHOLD); + } + } +} + + +void agglomerative_ccl_kernel( +distance_t max_cluster_distances[MAX_NUM_SPECTRA * (MAX_NUM_SPECTRA + 1) / 2], +int num_spectra, +int valid_clusters[MAX_NUM_SPECTRA], +int* num_valid_clusters_ptr, + +Cluster thres_clusters[MAX_NUM_SPECTRA], +int valid_dist_cnt +){ + int valid_total_clusters[MAX_NUM_SPECTRA]; + int num_valid_clusters_thres = *num_valid_clusters_ptr; + + int stack[MAX_NUM_SPECTRA]; + int stack_ptr = 0; + + initialize_clusters: for (int i = 0; i < MAX_NUM_SPECTRA; i++) { + #pragma HLS UNROLL + valid_clusters[i] = i; + valid_total_clusters[i]=i; + + thres_clusters[i].num_elements = 1; + thres_clusters[i].next = -1; + thres_clusters[i].end = i; + thres_clusters[i].check_cluster = 0; + + } + + + main_while_loop: for (int num_valid_clusters = *num_valid_clusters_ptr; num_valid_clusters > 1 && valid_dist_cnt > 0; num_valid_clusters--) { + #pragma HLS loop_tripcount min=300 max=300 + std::pair closest_clusters; + #pragma HLS ARRAY_PARTITION variable=valid_total_clusters complete + bool merged = false; + + merged_while : while (merged == false) { + #pragma HLS loop_tripcount min=8 max=8 + + if (stack_ptr == 0) { + stack[stack_ptr++] = 0; + } + + distance_t min_distance = distance_t(0.95*Dhv); + int i = stack[stack_ptr - 1]; + int j = -1; + + min_row : for (int k = 0; k < num_valid_clusters; ++k) { + #pragma HLS loop_tripcount min=150 max=150 + #pragma HLS pipeline II=1 + #pragma HLS UNROLL FACTOR=2 + if (k != i && (max_cluster_distances[index(valid_total_clusters[i], valid_total_clusters[k])] <= min_distance)) { + min_distance = max_cluster_distances[index(valid_total_clusters[i], valid_total_clusters[k])]; + j = k; + + } + } + + + bool in_stack = false; + if (stack[stack_ptr - 2] == j) { // Found the local minimum + in_stack = true; + + } + + + if (in_stack ) { + + if (j +void calculate_consensus( +distance_t original_distances[MAX_NUM_SPECTRA * (MAX_NUM_SPECTRA + 1) / 2], +Cluster clusters[MAX_NUM_SPECTRA], +int valid_clusters[MAX_NUM_SPECTRA], +int consensus[MAX_NUM_SPECTRA], +int num_valid_clusters +) { + + using sum_distance_t = std::conditional_t; + + calculate_consensus_outer: for(int i = 0; i < num_valid_clusters; i++) { + #pragma HLS loop_tripcount min=30 max=30 + int cluster_idx = valid_clusters[i]; + int num_elements = clusters[cluster_idx].num_elements; + + int elements_tmp[MAX_NUM_SPECTRA]; + + #pragma HLS ARRAY_PARTITION variable=elements_tmp + int iter_element = cluster_idx; + for (int j = 0; j < num_elements; j++){ + #pragma HLS loop_tripcount min=7 max=7 + #pragma HLS pipeline + elements_tmp[j] = iter_element; + iter_element = clusters[iter_element].next; + } + + if(num_elements == 1) { + consensus[i] = elements_tmp[0]; + } else { + sum_distance_t min_sum_distance = distance_t(0.95 * Dhv) * (num_elements - 1); + + calculate_consensus_inner: for(int j = 0; j < num_elements; j++) { + #pragma HLS loop_tripcount min=7 max=7 + #pragma HLS pipeline + sum_distance_t sum_distance = 0; + calculate_sum_distance: for(int k = 0; k < num_elements; k++) { // * Only check k>j + #pragma HLS loop_tripcount min=7 max=7 + #pragma HLS pipeline + if(j != k) { + sum_distance += original_distances[index(elements_tmp[j], elements_tmp[k])]; + } + } + if (sum_distance < min_sum_distance) { + min_sum_distance = sum_distance; + consensus[i] = elements_tmp[j]; + } + } + } + } +} + + + +void hd_encoding( + bitset_dhv ID[f], + bitset_dhv Level[Q], + PeakBuffer &peak_buffer_mz, + PeakBuffer &peak_buffer_intensity, + int peak_count, + bitset_dhv *encoded_spectra +) { + bitset_dhv xor_results[MAX_PEAKS]; + #pragma HLS BIND_STORAGE variable=ID type=RAM_1P impl=uram + + peak_cnt_t sum[Dhv]; + + #pragma HLS ARRAY_PARTITION variable=sum + + + loop_init: for (int j=0; j half_peak_count_tmp) ? 1 : 0; + } + + *encoded_spectra = majority; +} + + +void hd_loop_stream_in( + tapa::mmap> peak_mz_buffer, // int *peak_mz_buffer, + tapa::mmap> peak_intensity_buffer, // int *peak_intensity_buffer, + tapa::mmap peak_count_buffer, // int *peak_count_buffer, + tapa::ostream &local_peak_mz_stream, + tapa::ostream &local_peak_intensity_stream, + tapa::ostream &peak_count, + int batch_size +) { + loop_stream_in: for (int ib = 0; ib < batch_size*MAX_NUM_SPECTRA; ib++) { + #pragma HLS loop_tripcount min=300000 max=300000 + #pragma HLS pipeline + peak_count << peak_count_buffer[ib]; + PeakBuffer local_peak_mz; + PeakBuffer local_peak_intensity; + #pragma HLS aggregate variable=local_peak_mz + #pragma HLS aggregate variable=local_peak_intensity + for (int j = 0; j < PEAK_LOOP_CNT; j++) { + auto tmp_mz = peak_mz_buffer[ib * PEAK_LOOP_CNT + j]; + auto tmp_intensity = peak_intensity_buffer[ib * PEAK_LOOP_CNT + j]; + for (int k = 0; k < PEAK_PORT_W/32; k++){ + #pragma HLS UNROLL + local_peak_mz.data[j*32 + k] = tmp_mz.range(k*32+31, k*32); + local_peak_intensity.data[j*32 + k] = tmp_intensity.range(k*32+31, k*32); + } + if (j == PEAK_LOOP_CNT-1){ //last iter + local_peak_mz_stream << local_peak_mz; + local_peak_intensity_stream << local_peak_intensity; + } + } + } +} + +void hd_loop_workload( + // bitset_dhv ID_local[f], + // bitset_dhv Level_local[Q], + tapa::mmap> ID_Level_buffer, // bitset_dhv *ID_Level_buffer, + tapa::istream &local_peak_mz_stream, + tapa::istream &local_peak_intensity_stream, + tapa::istream &num_spectra_stream, + tapa::ostream &num_spectra_stream_c1, + tapa::istream &peak_count, + tapa::ostream &encoded_spectra, + int batch_size +){ + bitset_dhv ID_local[f], Level_local[Q]; + #pragma HLS BIND_STORAGE variable=ID_local type=RAM_1P impl=uram + + //! Return when batch size==0 + if (batch_size == 0) return; + + for(int j=0; j> local_peak_mz; + local_peak_intensity_stream >> local_peak_intensity; + + peak_cnt_t local_peak_count; + bitset_dhv local_encoded_spectra; + peak_count >> local_peak_count; + local_peak_count = (i < num_spectra_tmp) ? local_peak_count : peak_cnt_t(0); + hd_encoding(ID_local, Level_local, local_peak_mz, local_peak_intensity, local_peak_count, &local_encoded_spectra); + encoded_spectra << local_encoded_spectra; + } + } + +} + +void hd_loop_stream_num_in ( + tapa::mmap num_spectra_buffer, + tapa::ostream &num_spectra_stream, + int batch_size +){ + loop_stream_num_in: for(int b=0; b < batch_size; b++){ + #pragma HLS loop_tripcount min=1000 max=1000 + #pragma HLS pipeline + num_spectra_stream << num_spectra_buffer[b]; + } +} + +void hd_loop_stream_out( + tapa::istream &encoded_spectra, + tapa::istream &num_spectra_stream_c1, + tapa::ostreams &encoded_spectra_streams, // bitset_dhv *encoded_spectra_hbm, + int batch_size +){ +loop_stream_out:for (int b = 0; b < batch_size; b++) { + #pragma HLS loop_tripcount min = 1000 max = 1000 + + int n = b % N_CLUSTERING; + int num_spectra_tmp; + num_spectra_stream_c1 >> num_spectra_tmp; + encoded_spectra_streams[n].write(bitset_dhv(num_spectra_tmp)); + loop_stream_out_12:for (int s = 0; s < MAX_NUM_SPECTRA; s++) { + #pragma HLS pipeline + bitset_dhv local_encoded_buffer; + encoded_spectra >> local_encoded_buffer; + if (s < num_spectra_tmp) { + encoded_spectra_streams[n].write(local_encoded_buffer); + } + } + } +} + +/* +extern "C" void encoding_kernel( + // bitset_dhv *ID_buffer, + // bitset_dhv *Level_buffer, + tapa::mmap> ID_Level_buffer, // bitset_dhv *ID_Level_buffer, + tapa::mmap peak_mz_buffer, // int *peak_mz_buffer, + tapa::mmap peak_intensity_buffer, // int *peak_intensity_buffer, + tapa::mmap peak_count_buffer, // int *peak_count_buffer, + int num_spectra, // int num_spectra, + tapa::mmap> encoded_spectra_hbm, // bitset_dhv *encoded_spectra_hbm, + int batch_size // int batch_size +) { +// void encoding_work_func( +// bitset_dhv ID_local[f], +// bitset_dhv Level_local[Q], +// tapa::mmap peak_mz_buffer, // int *peak_mz_buffer, +// tapa::mmap peak_intensity_buffer, // int *peak_intensity_buffer, +// tapa::mmap peak_count_buffer, // int *peak_count_buffer, +// tapa::mmap encoded_spectra_hbm, // bitset_dhv *encoded_spectra_hbm, + +// int batch_size +// ){ + // #pragma HLS dataflow + + tapa::stream local_peak_stream; + tapa::stream peak_count; + tapa::stream encoded_spectra; + + // #pragma HLS STREAM variable=peak_count depth=16 type=fifo + // #pragma HLS STREAM variable=encoded_spectra depth=16 type=fifo + + + // loop_stream_in: for (int ib = 0; ib < batch_size*MAX_NUM_SPECTRA; ib++) { + // #pragma HLS loop_tripcount min=300000 max=300000 + // #pragma HLS pipeline + // peak_count << peak_count_buffer[ib]; + // PeakBuffer local_peak; + // #pragma HLS aggregate variable=local_peak + // for (int j = 0; j < MAX_PEAKS; j++) { + // local_peak.mz[j] = peak_mz_buffer[ib * MAX_PEAKS + j]; + // local_peak.intensity[j] = peak_intensity_buffer[ib * MAX_PEAKS + j]; + // } + // local_peak_stream << local_peak; + // } + + // loop_workload: for (int ib = 0; ib < batch_size*MAX_NUM_SPECTRA; ib++) { + // #pragma HLS loop_tripcount min=300000 max=300000 + // #pragma HLS pipeline + // PeakBuffer local_peak; + // #pragma HLS aggregate variable=local_peak + // local_peak_stream >> local_peak; + + // peak_cnt_t local_peak_count; + // bitset_dhv local_encoded_spectra; + // peak_count >> local_peak_count; + // hd_encoding(ID_local, Level_local, local_peak, local_peak_count, &local_encoded_spectra); + // encoded_spectra << local_encoded_spectra; + // } + + // loop_stream_out: for (int ib = 0; ib < batch_size*MAX_NUM_SPECTRA; ib++) { + // #pragma HLS loop_tripcount min=300000 max=300000 + // #pragma HLS pipeline + // bitset_dhv local_encoded_spectra; + // encoded_spectra >> local_encoded_spectra; + // encoded_spectra_hbm[ib] = local_encoded_spectra; + // } + + tapa::task() + .invoke(hd_loop_stream_in, peak_mz_buffer, peak_intensity_buffer, + peak_count_buffer, local_peak_stream, peak_count, batch_size) + .invoke(hd_loop_workload, ID_Level_buffer, local_peak_stream, + peak_count, encoded_spectra, batch_size) + .invoke(hd_loop_stream_out, encoded_spectra_hbm, encoded_spectra, batch_size); +} + + +extern "C" void encoding_kernel( + // bitset_dhv *ID_buffer, + // bitset_dhv *Level_buffer, + tapa::mmap ID_Level_buffer, // bitset_dhv *ID_Level_buffer, + tapa::mmap peak_mz_buffer, // int *peak_mz_buffer, + tapa::mmap peak_intensity_buffer, // int *peak_intensity_buffer, + tapa::mmap peak_count_buffer, // int *peak_count_buffer, + int num_spectra, // int num_spectra, + tapa::mmap encoded_spectra_hbm, // bitset_dhv *encoded_spectra_hbm, + int batch_size // int batch_size +) { + // #pragma HLS INTERFACE m_axi port=encoded_spectra_hbm offset=slave bundle=gmem + // #pragma HLS INTERFACE m_axi port=encoded_spectra_hbm offset=slave bundle=gmem num_read_outstanding=1 max_read_burst_length=2 + // #pragma HLS INTERFACE m_axi port=ID_Level_buffer offset=slave bundle=gmem0 num_write_outstanding=1 max_write_burst_length=2 + // #pragma HLS INTERFACE m_axi port=peak_mz_buffer offset=slave bundle=gmem2 num_write_outstanding=1 max_write_burst_length=2 + // #pragma HLS INTERFACE m_axi port=peak_intensity_buffer offset=slave bundle=gmem3 num_write_outstanding=1 max_write_burst_length=2 + // #pragma HLS INTERFACE m_axi port=peak_count_buffer offset=slave bundle=gmem4 num_write_outstanding=1 max_write_burst_length=2 + + bitset_dhv ID_local[f], Level_local[Q]; + #pragma HLS BIND_STORAGE variable=ID_local type=RAM_1P impl=uram + + + for(int j=0; j> encoded_spectra_buffer,// bitset_dhv *encoded_spectra_buffer, +tapa::istream &encoded_spectra_stream, +tapa::mmap num_valid_clusters, // int *num_valid_clusters, +tapa::mmap elements, // int elements[MAX_NUM_SPECTRA * CLUSTER_SIZE], +tapa::mmap consensus, // int consensus[MAX_NUM_SPECTRA], +tapa::mmap num_elements, // int *num_elements, +int batch_size_a, +int pid +) { + + // #pragma HLS INTERFACE m_axi port=valid_clusters offset=slave bundle=gmem10 + // #pragma HLS INTERFACE m_axi port=elements offset=slave bundle=gmem11 num_read_outstanding=1 max_read_burst_length=2 + // #pragma HLS INTERFACE m_axi port=consensus offset=slave bundle=gmem12 num_read_outstanding=1 max_read_burst_length=2 + + // static hls::stream encoded_spectra_stream; + + // #pragma HLS STREAM variable=encoded_spectra_stream depth=310 + + distance_t max_cluster_distances[MAX_NUM_SPECTRA * (MAX_NUM_SPECTRA + 1) / 2]; + distance_t original_distances [MAX_NUM_SPECTRA * (MAX_NUM_SPECTRA + 1) / 2]; + + #pragma HLS BIND_STORAGE variable=original_distances type=ram_2p impl=uram + #pragma HLS BIND_STORAGE variable=max_cluster_distances type=ram_2p impl=uram + + + int local_num_valid_clusters; + int local_consensus [MAX_NUM_SPECTRA]; + int local_valid_clusters [MAX_NUM_SPECTRA]; + bitset_dhv local_encoded_spectra [MAX_NUM_SPECTRA]; + + Cluster thres_clusters[MAX_NUM_SPECTRA]; + + int batch_size = (batch_size_a+N_CLUSTERING-pid)/N_CLUSTERING; + + for (int ib=0; ib(original_distances, thres_clusters, local_valid_clusters, local_consensus, local_num_valid_clusters); + + + int offset = batch_size*CLUSTER_SIZE; + num_valid_clusters[offset] = local_num_valid_clusters; + copy_cluster_elements: for (int i = 0; i < local_num_valid_clusters; i++) { + #pragma HLS loop_tripcount min=30 max=30 + #pragma HLS pipeline + #pragma HLS UNROLL FACTOR=2 skip_exit_check + + int cluster_idx = local_valid_clusters[i]; + // valid_clusters[offset + i] = cluster_idx; + num_elements[offset + i] = thres_clusters[cluster_idx].num_elements; + consensus[offset + i] = local_consensus[i]; + + } + + int element_copy_cnt = offset; + copy_cluster_elements_inner: for (int i = 0; i < local_num_valid_clusters; i++) { + #pragma HLS loop_tripcount min=30 max=30 + int cluster_idx = local_valid_clusters[i]; + int num_elements = thres_clusters[cluster_idx].num_elements; + int iter_element = cluster_idx; + for (int j = 0; j < num_elements; j++) { + #pragma HLS loop_tripcount min=4 max=20 + #pragma HLS pipeline + elements[element_copy_cnt++] = iter_element; + iter_element = thres_clusters[iter_element].next; + } + } + } + +} + +extern "C" void wrapper( + tapa::mmap> ID_Level_buffer, // bitset_dhv *ID_Level_buffer, + tapa::mmap> peak_mz_buffer, // int *peak_mz_buffer, + tapa::mmap> peak_intensity_buffer, // int *peak_intensity_buffer, + tapa::mmap peak_count_buffer, // int *peak_count_buffer, + tapa::mmap num_spectra_buffer, // int *num_spectra_buffer, + // tapa::mmap> encoded_spectra_hbm, // bitset_dhv *encoded_spectra_hbm, + int batch_size, // int batch_size + + tapa::mmap c0_num_valid_clusters, // 1 + tapa::mmap c0_elements, //[MAX_NUM_SPECTRA * CLUSTER_SIZE], + tapa::mmap c0_num_elements, // [CLUSTER_SIZE] + tapa::mmap c0_consensus, //[CLUSTER_SIZE], + + tapa::mmap c1_num_valid_clusters, // 1 + tapa::mmap c1_elements, //[MAX_NUM_SPECTRA * CLUSTER_SIZE], + tapa::mmap c1_num_elements, // [CLUSTER_SIZE] + tapa::mmap c1_consensus, //[CLUSTER_SIZE], + + tapa::mmapc2_num_valid_clusters, // 1 + tapa::mmap c2_elements, //[MAX_NUM_SPECTRA * CLUSTER_SIZE], + tapa::mmap c2_num_elements, // [CLUSTER_SIZE] + tapa::mmap c2_consensus, //[CLUSTER_SIZE], + + tapa::mmap c3_num_valid_clusters, // 1 + tapa::mmap c3_elements, //[MAX_NUM_SPECTRA * CLUSTER_SIZE], + tapa::mmap c3_num_elements, // [CLUSTER_SIZE] + tapa::mmap c3_consensus //[CLUSTER_SIZE], +){ + tapa::stream local_peak_mz_stream; + tapa::stream local_peak_intensity_stream; + tapa::stream peak_count; + tapa::stream encoded_spectra; + tapa::stream num_spectra_stream; + tapa::stream num_spectra_stream_c1; + + tapa::streams encoded_spectra_stream; + + tapa::task() + .invoke(hd_loop_stream_in, peak_mz_buffer, peak_intensity_buffer, + peak_count_buffer, local_peak_mz_stream, local_peak_intensity_stream, peak_count, batch_size) + .invoke(hd_loop_stream_num_in, num_spectra_buffer, num_spectra_stream, batch_size) + .invoke(hd_loop_workload, ID_Level_buffer, local_peak_mz_stream, local_peak_intensity_stream, num_spectra_stream, num_spectra_stream_c1, + peak_count, encoded_spectra, batch_size) + .invoke(hd_loop_stream_out,encoded_spectra, num_spectra_stream_c1, + encoded_spectra_stream, batch_size) + .invoke(clustering_kernel,encoded_spectra_stream[0],c0_num_valid_clusters, c0_elements, c0_num_elements, c0_consensus, batch_size, 1) + .invoke(clustering_kernel,encoded_spectra_stream[1],c1_num_valid_clusters, c1_elements, c1_num_elements, c1_consensus, batch_size, 2) + .invoke(clustering_kernel,encoded_spectra_stream[2],c2_num_valid_clusters, c2_elements, c2_num_elements, c2_consensus, batch_size, 3) + .invoke(clustering_kernel,encoded_spectra_stream[3],c3_num_valid_clusters, c3_elements, c3_num_elements, c3_consensus, batch_size, 4); +} + diff --git a/regression/spectraflux/src/kernel_tap/hac.h b/regression/spectraflux/src/kernel_tap/hac.h new file mode 100644 index 0000000..1bb53ba --- /dev/null +++ b/regression/spectraflux/src/kernel_tap/hac.h @@ -0,0 +1,72 @@ +#pragma once + +// #include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_NUM_SPECTRA 300 +#define Dhv 2048 +#define f 4400 +#define Q 16 +#define MAX_PEAKS 64 +#define DISTANCE_THRESHOLD (0.25 * Dhv) +#define CLUSTER_SIZE 300 + +#define MAX_BATCH_SIZE 1024 + +#define N_CLUSTERING 4 + +#define PEAK_PORT_W 1024 +#define PEAK_LOOP_CNT ( (MAX_PEAKS+(PEAK_PORT_W/32)-1) / (PEAK_PORT_W/32) ) + +#define LOG_FUNCTION(X) (\ + X <= 15 ? 4 : \ + X <= 31 ? 5 : \ + X <= 63 ? 6 : \ + X <= 127 ? 7 : \ + X <= 255 ? 8 : \ + X <= 511 ? 9 : \ + X <= 1023 ? 10 : \ + X <= 2047 ? 11 : \ + X <= 4095 ? 12 : \ + X <= 8191 ? 13 : \ + X <= 16383 ? 14 : \ + X <= 65535 ? 16 : 32 \ + ) + + +#define LOG_MAX_PEAKS ( LOG_FUNCTION(MAX_PEAKS) ) +#define LOG_Dhv ( LOG_FUNCTION(Dhv) ) +#define LOG_MAX_NUM_SPECTRA ( LOG_FUNCTION(MAX_NUM_SPECTRA) ) + + +typedef ap_uint bitset_dhv; +typedef ap_uint peak_cnt_t; + +typedef ap_uint distance_t; +typedef ap_uint acc_distance_t; +typedef ap_uint acc_thres_distance_t; + + +struct Cluster { + int next; + int end; + int num_elements; + int check_cluster; +}; + +struct PeakBuffer +{ + // int mz[MAX_PEAKS]; + // int intensity[MAX_PEAKS]; + int data[MAX_PEAKS]; +}; + +typedef bitset_dhv EncodedBuffer; \ No newline at end of file diff --git a/regression/spectraflux/src/kernel_tapa_PCIe/hac.h b/regression/spectraflux/src/kernel_tapa_PCIe/hac.h new file mode 100644 index 0000000..223d202 --- /dev/null +++ b/regression/spectraflux/src/kernel_tapa_PCIe/hac.h @@ -0,0 +1,72 @@ +#pragma once + +// #include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_NUM_SPECTRA 300 +#define Dhv 2048 +#define f 4400 +#define Q 16 +#define MAX_PEAKS 50 +#define DISTANCE_THRESHOLD (0.25 * Dhv) +#define CLUSTER_SIZE 300 + +#define MAX_BATCH_SIZE 1024 + +#define N_CLUSTERING 4 + +#define PEAK_PORT_W 1024 +#define PEAK_LOOP_CNT ( (MAX_PEAKS+(PEAK_PORT_W/32)-1) / (PEAK_PORT_W/32) ) + +#define LOG_FUNCTION(X) (\ + X <= 15 ? 4 : \ + X <= 31 ? 5 : \ + X <= 63 ? 6 : \ + X <= 127 ? 7 : \ + X <= 255 ? 8 : \ + X <= 511 ? 9 : \ + X <= 1023 ? 10 : \ + X <= 2047 ? 11 : \ + X <= 4095 ? 12 : \ + X <= 8191 ? 13 : \ + X <= 16383 ? 14 : \ + X <= 65535 ? 16 : 32 \ + ) + + +#define LOG_MAX_PEAKS ( LOG_FUNCTION(MAX_PEAKS) ) +#define LOG_Dhv ( LOG_FUNCTION(Dhv) ) +#define LOG_MAX_NUM_SPECTRA ( LOG_FUNCTION(MAX_NUM_SPECTRA) ) + + +typedef ap_uint bitset_dhv; +typedef ap_uint peak_cnt_t; + +typedef ap_uint distance_t; +typedef ap_uint acc_distance_t; +typedef ap_uint acc_thres_distance_t; + + +struct Cluster { + int next; + int end; + int num_elements; + int check_cluster; +}; + +struct PeakBuffer +{ + int mz[MAX_PEAKS]; + int intensity[MAX_PEAKS]; + // int data[MAX_PEAKS]; +}; + +typedef ap_uint<1024> EncodedBuffer; \ No newline at end of file diff --git a/regression/spectraflux/xrt.ini b/regression/spectraflux/xrt.ini new file mode 100644 index 0000000..b3f14ea --- /dev/null +++ b/regression/spectraflux/xrt.ini @@ -0,0 +1,7 @@ +[Debug] +profile=true +timeline_trace=true +data_transfer_trace=coarse +opencl_trace = true +device_trace = coarse +