-
Notifications
You must be signed in to change notification settings - Fork 225
/
Copy pathBaseRestRemoteClient.h
1305 lines (1234 loc) · 56.9 KB
/
BaseRestRemoteClient.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/****************************************************************-*- C++ -*-****
* Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. *
* All rights reserved. *
* *
* This source code and the accompanying materials are made available under *
* the terms of the Apache License 2.0 which accompanies this distribution. *
******************************************************************************/
#pragma once
#include "common/ArgumentConversion.h"
#include "common/Environment.h"
#include "common/JsonConvert.h"
#include "common/Logger.h"
#include "common/NvqcConfig.h"
#include "common/RemoteKernelExecutor.h"
#include "common/RestClient.h"
#include "common/RuntimeMLIR.h"
#include "common/UnzipUtils.h"
#include "cudaq.h"
#include "cudaq/Frontend/nvqpp/AttributeNames.h"
#include "cudaq/Optimizer/Builder/Runtime.h"
#include "cudaq/Optimizer/CodeGen/OpenQASMEmitter.h"
#include "cudaq/Optimizer/CodeGen/Passes.h"
#include "cudaq/Optimizer/CodeGen/Pipelines.h"
#include "cudaq/Optimizer/Dialect/CC/CCDialect.h"
#include "cudaq/Optimizer/Dialect/CC/CCOps.h"
#include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h"
#include "cudaq/Optimizer/Transforms/Passes.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/IR/Module.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Support/Base64.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SourceMgr.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/Math/IR/Math.h"
#include "mlir/ExecutionEngine/ExecutionEngine.h"
#include "mlir/ExecutionEngine/OptUtils.h"
#include "mlir/IR/ImplicitLocOpBuilder.h"
#include "mlir/Parser/Parser.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Pass/PassRegistry.h"
#include <cxxabi.h>
#include <dlfcn.h>
#include <fstream>
#include <iostream>
#include <limits>
#include <regex>
#include <streambuf>
namespace {
/// Util class to execute a functor when an object of this class goes
/// out-of-scope.
// This can be used to perform some clean up.
// ```
// {
// ScopeExit cleanUp(f);
// ...
// } <- f() is called to perform some cleanup action.
// ```
struct ScopeExit {
ScopeExit(std::function<void()> &&func) : m_atExitFunc(std::move(func)) {}
~ScopeExit() noexcept { m_atExitFunc(); }
ScopeExit(const ScopeExit &) = delete;
ScopeExit &operator=(const ScopeExit &) = delete;
ScopeExit(ScopeExit &&other) = delete;
ScopeExit &operator=(ScopeExit &&other) = delete;
private:
std::function<void()> m_atExitFunc;
};
} // namespace
namespace cudaq {
class BaseRemoteRestRuntimeClient : public cudaq::RemoteRuntimeClient {
protected:
std::string m_url;
static inline const std::vector<std::string> clientPasses = {};
static inline const std::vector<std::string> serverPasses = {};
// Random number generator.
std::mt19937 randEngine{std::random_device{}()};
static constexpr std::array<std::string_view, 1>
DISALLOWED_EXECUTION_CONTEXT = {"tracer"};
static constexpr bool isDisallowed(std::string_view context) {
return std::any_of(DISALLOWED_EXECUTION_CONTEXT.begin(),
DISALLOWED_EXECUTION_CONTEXT.end(),
[context](std::string_view disallowed) {
return disallowed == context;
});
}
/// @brief Flag indicating whether we should enable MLIR printing before and
/// after each pass. This is similar to `-mlir-print-ir-before-all` and
/// `-mlir-print-ir-after-all` in `cudaq-opt`.
bool enablePrintMLIREachPass = false;
public:
virtual void setConfig(
const std::unordered_map<std::string, std::string> &configs) override {
const auto urlIter = configs.find("url");
if (urlIter != configs.end())
m_url = urlIter->second;
}
virtual int version() const override {
// Check if we have an environment variable override
if (auto *envVal = std::getenv("CUDAQ_REST_CLIENT_VERSION"))
return std::stoi(envVal);
// Otherwise, just use the version defined in the code.
return cudaq::RestRequest::REST_PAYLOAD_VERSION;
}
std::string constructKernelPayload(
mlir::MLIRContext &mlirContext, const std::string &name,
void (*kernelFunc)(void *), const void *args, std::uint64_t voidStarSize,
std::size_t startingArgIdx, const std::vector<void *> *rawArgs) {
enablePrintMLIREachPass =
getEnvBool("CUDAQ_MLIR_PRINT_EACH_PASS", enablePrintMLIREachPass);
if (cudaq::__internal__::isLibraryMode(name)) {
// Library mode: retrieve the embedded bitcode in the executable.
const auto path = llvm::sys::fs::getMainExecutable(nullptr, nullptr);
// Load the object file
auto [objBin, objBuffer] =
llvm::cantFail(llvm::object::ObjectFile::createObjectFile(path))
.takeBinary();
if (!objBin)
throw std::runtime_error("Failed to load binary object file");
for (const auto §ion : objBin->sections()) {
// Get the bitcode section
if (section.isBitcode()) {
llvm::MemoryBufferRef llvmBc(llvm::cantFail(section.getContents()),
"Bitcode");
return llvm::encodeBase64(llvmBc.getBuffer());
}
}
return "";
} else {
// Get the quake representation of the kernel
auto quakeCode = cudaq::get_quake_by_name(name);
auto module = parseSourceString<mlir::ModuleOp>(quakeCode, &mlirContext);
if (!module)
throw std::runtime_error("module cannot be parsed");
// Extract the kernel name
auto func = module->lookupSymbol<mlir::func::FuncOp>(
std::string("__nvqpp__mlirgen__") + name);
// Create a new Module to clone the function into
auto location =
mlir::FileLineColLoc::get(&mlirContext, "<builder>", 1, 1);
mlir::ImplicitLocOpBuilder builder(location, &mlirContext);
// Add CUDA-Q kernel attribute if not already set.
if (!func->hasAttr(cudaq::kernelAttrName))
func->setAttr(cudaq::kernelAttrName, builder.getUnitAttr());
// Add entry-point attribute if not already set.
if (!func->hasAttr(cudaq::entryPointAttrName))
func->setAttr(cudaq::entryPointAttrName, builder.getUnitAttr());
auto moduleOp = builder.create<mlir::ModuleOp>();
moduleOp->setAttrs((*module)->getAttrDictionary());
for (auto &op : *module) {
if (auto funcOp = dyn_cast<mlir::func::FuncOp>(op)) {
// Add quantum kernels defined in the module.
if (funcOp->hasAttr(cudaq::kernelAttrName) ||
funcOp.getName().startswith("__nvqpp__mlirgen__") ||
funcOp.getBody().empty())
moduleOp.push_back(funcOp.clone());
}
// Add globals defined in the module.
if (auto globalOp = dyn_cast<cc::GlobalOp>(op))
moduleOp.push_back(globalOp.clone());
}
if (rawArgs || args) {
mlir::PassManager pm(&mlirContext);
if (rawArgs && !rawArgs->empty()) {
cudaq::info("Run Argument Synth.\n");
opt::ArgumentConverter argCon(name, moduleOp);
argCon.gen_drop_front(*rawArgs, startingArgIdx);
std::string kernName = runtime::cudaqGenPrefixName + name;
mlir::SmallVector<mlir::StringRef> kernels = {kernName};
std::string substBuff;
llvm::raw_string_ostream ss(substBuff);
ss << argCon.getSubstitutionModule();
mlir::SmallVector<mlir::StringRef> substs = {substBuff};
pm.addNestedPass<mlir::func::FuncOp>(
opt::createArgumentSynthesisPass(kernels, substs));
pm.addPass(mlir::createCanonicalizerPass());
pm.addPass(opt::createDeleteStates());
} else if (args) {
cudaq::info("Run Quake Synth.\n");
pm.addPass(opt::createQuakeSynthesizer(name, args, startingArgIdx));
}
pm.addPass(mlir::createCanonicalizerPass());
if (enablePrintMLIREachPass) {
moduleOp.getContext()->disableMultithreading();
pm.enableIRPrinting();
}
if (failed(pm.run(moduleOp)))
throw std::runtime_error("Could not successfully apply quake-synth.");
}
// Note: do not run state preparation pass here since we are always
// using simulators.
// Run client-side passes. `clientPasses` is empty right now, but the code
// below accommodates putting passes into it.
mlir::PassManager pm(&mlirContext);
std::string errMsg;
llvm::raw_string_ostream os(errMsg);
const std::string pipeline =
std::accumulate(clientPasses.begin(), clientPasses.end(),
std::string(), [](const auto &ss, const auto &s) {
return ss.empty() ? s : ss + "," + s;
});
if (enablePrintMLIREachPass) {
moduleOp.getContext()->disableMultithreading();
pm.enableIRPrinting();
}
if (failed(parsePassPipeline(pipeline, pm, os)))
throw std::runtime_error(
"Remote rest platform failed to add passes to pipeline (" + errMsg +
").");
opt::addPipelineConvertToQIR(pm);
if (failed(pm.run(moduleOp)))
throw std::runtime_error(
"Remote rest platform: applying IR passes failed.");
std::string mlirCode;
llvm::raw_string_ostream outStr(mlirCode);
mlir::OpPrintingFlags opf;
opf.enableDebugInfo(/*enable=*/true,
/*pretty=*/false);
moduleOp.print(outStr, opf);
return llvm::encodeBase64(mlirCode);
}
}
cudaq::RestRequest constructVQEJobRequest(
mlir::MLIRContext &mlirContext, cudaq::ExecutionContext &io_context,
const std::string &backendSimName, const std::string &kernelName,
const void *kernelArgs, cudaq::gradient *gradient,
cudaq::optimizer &optimizer, const int n_params,
const std::vector<void *> *rawArgs) {
cudaq::RestRequest request(io_context, version());
request.opt = RestRequestOptFields();
request.opt->optimizer_n_params = n_params;
request.opt->optimizer_type = get_optimizer_type(optimizer);
request.opt->optimizer_ptr = &optimizer;
request.opt->gradient_ptr = gradient;
if (gradient)
request.opt->gradient_type = get_gradient_type(*gradient);
request.entryPoint = kernelName;
request.passes = serverPasses;
request.format = cudaq::CodeFormat::MLIR;
request.code =
constructKernelPayload(mlirContext, kernelName, /*kernelFunc=*/nullptr,
/*kernelArgs=*/kernelArgs,
/*argsSize=*/0, /*startingArgIdx=*/1, rawArgs);
request.simulator = backendSimName;
// Remote server seed
// Note: unlike local executions whereby a static instance of the simulator
// is seeded once when `cudaq::set_random_seed` is called, thus not being
// re-seeded between executions. For remote executions, we use the runtime
// level seed value to seed a random number generator to seed the server.
// i.e., consecutive remote executions on the server from the same client
// session (where `cudaq::set_random_seed` is called), get new random seeds
// for each execution. The sequence is still deterministic based on the
// runtime-level seed value.
request.seed = [&]() {
std::uniform_int_distribution<std::size_t> seedGen(
std::numeric_limits<std::size_t>::min(),
std::numeric_limits<std::size_t>::max());
return seedGen(randEngine);
}();
return request;
}
cudaq::RestRequest constructJobRequest(
mlir::MLIRContext &mlirContext, cudaq::ExecutionContext &io_context,
cudaq::SerializedCodeExecutionContext *serializedCodeContext,
const std::string &backendSimName, const std::string &kernelName,
void (*kernelFunc)(void *), const void *kernelArgs,
std::uint64_t argsSize, const std::vector<void *> *rawArgs) {
cudaq::RestRequest request(io_context, version());
if (serializedCodeContext)
request.serializedCodeExecutionContext = *serializedCodeContext;
request.entryPoint = kernelName;
if (cudaq::__internal__::isLibraryMode(kernelName)) {
request.format = cudaq::CodeFormat::LLVM;
if (kernelArgs && argsSize > 0) {
cudaq::info("Serialize {} bytes of args.", argsSize);
request.args.resize(argsSize);
std::memcpy(request.args.data(), kernelArgs, argsSize);
}
if (kernelFunc) {
::Dl_info info;
::dladdr(reinterpret_cast<void *>(kernelFunc), &info);
const auto funcName = cudaq::quantum_platform::demangle(info.dli_sname);
cudaq::info("RemoteSimulatorQPU: retrieve name '{}' for kernel {}",
funcName, kernelName);
request.entryPoint = funcName;
}
} else {
request.passes = serverPasses;
request.format = cudaq::CodeFormat::MLIR;
}
if (io_context.name == "state-overlap") {
if (!io_context.overlapComputeStates.has_value())
throw std::runtime_error("Invalid execution context: no input states");
const auto *castedState1 = dynamic_cast<const RemoteSimulationState *>(
io_context.overlapComputeStates->first);
const auto *castedState2 = dynamic_cast<const RemoteSimulationState *>(
io_context.overlapComputeStates->second);
if (!castedState1 || !castedState2)
throw std::runtime_error(
"Invalid execution context: input states are not compatible");
auto [kernelName1, args1] = castedState1->getKernelInfo();
auto [kernelName2, args2] = castedState2->getKernelInfo();
cudaq::IRPayLoad stateIrPayload1, stateIrPayload2;
stateIrPayload1.entryPoint = kernelName1;
stateIrPayload1.ir =
constructKernelPayload(mlirContext, kernelName1, nullptr, nullptr, 0,
/*startingArgIdx=*/0, &args1);
stateIrPayload2.entryPoint = kernelName2;
stateIrPayload2.ir =
constructKernelPayload(mlirContext, kernelName2, nullptr, nullptr, 0,
/*startingArgIdx=*/0, &args2);
// First kernel of the overlap calculation
request.code = stateIrPayload1.ir;
request.entryPoint = stateIrPayload1.entryPoint;
// Second kernel of the overlap calculation
request.overlapKernel = stateIrPayload2;
} else if (serializedCodeContext == nullptr) {
request.code = constructKernelPayload(mlirContext, kernelName, kernelFunc,
kernelArgs, argsSize,
/*startingArgIdx=*/0, rawArgs);
}
request.simulator = backendSimName;
// Remote server seed
// Note: unlike local executions whereby a static instance of the simulator
// is seeded once when `cudaq::set_random_seed` is called, thus not being
// re-seeded between executions. For remote executions, we use the runtime
// level seed value to seed a random number generator to seed the server.
// i.e., consecutive remote executions on the server from the same client
// session (where `cudaq::set_random_seed` is called), get new random seeds
// for each execution. The sequence is still deterministic based on the
// runtime-level seed value.
request.seed = [&]() {
std::uniform_int_distribution<std::size_t> seedGen(
std::numeric_limits<std::size_t>::min(),
std::numeric_limits<std::size_t>::max());
return seedGen(randEngine);
}();
return request;
}
virtual bool
sendRequest(mlir::MLIRContext &mlirContext,
cudaq::ExecutionContext &io_context,
cudaq::SerializedCodeExecutionContext *serializedCodeContext,
cudaq::gradient *vqe_gradient, cudaq::optimizer *vqe_optimizer,
const int vqe_n_params, const std::string &backendSimName,
const std::string &kernelName, void (*kernelFunc)(void *),
const void *kernelArgs, std::uint64_t argsSize,
std::string *optionalErrorMsg,
const std::vector<void *> *rawArgs) override {
if (isDisallowed(io_context.name))
throw std::runtime_error(
io_context.name +
" operation is not supported with cudaq target remote-mqpu!");
cudaq::RestRequest request = [&]() {
if (vqe_n_params > 0)
return constructVQEJobRequest(mlirContext, io_context, backendSimName,
kernelName, kernelArgs, vqe_gradient,
*vqe_optimizer, vqe_n_params, rawArgs);
return constructJobRequest(mlirContext, io_context, serializedCodeContext,
backendSimName, kernelName, kernelFunc,
kernelArgs, argsSize, rawArgs);
}();
if (request.code.empty() && (serializedCodeContext == nullptr ||
serializedCodeContext->source_code.empty())) {
if (optionalErrorMsg)
*optionalErrorMsg =
std::string(
"Failed to construct/retrieve kernel IR for kernel named ") +
kernelName;
return false;
}
// Don't let curl adding "Expect: 100-continue" header, which is not
// suitable for large requests, e.g., bitcode in the JSON request.
// Ref: https://gms.tf/when-curl-sends-100-continue.html
std::map<std::string, std::string> headers{
{"Expect:", ""}, {"Content-type", "application/json"}};
json requestJson = request;
try {
cudaq::RestClient restClient;
auto resultJs =
restClient.post(m_url, "job", requestJson, headers, false);
cudaq::debug("Response: {}", resultJs.dump(/*indent=*/2));
if (!resultJs.contains("executionContext")) {
std::stringstream errorMsg;
if (resultJs.contains("status")) {
errorMsg << "Failed to execute the kernel on the remote server: "
<< resultJs["status"] << "\n";
if (resultJs.contains("errorMessage")) {
errorMsg << "Error message: " << resultJs["errorMessage"] << "\n";
}
} else {
errorMsg << "Failed to execute the kernel on the remote server.\n";
errorMsg << "Unexpected response from the REST server. Missing the "
"required field 'executionContext'.";
}
if (optionalErrorMsg)
*optionalErrorMsg = errorMsg.str();
return false;
}
resultJs["executionContext"].get_to(io_context);
return true;
} catch (std::exception &e) {
if (optionalErrorMsg)
*optionalErrorMsg = e.what();
return false;
} catch (...) {
std::string exType = __cxxabiv1::__cxa_current_exception_type()->name();
auto demangledPtr =
__cxxabiv1::__cxa_demangle(exType.c_str(), nullptr, nullptr, nullptr);
if (demangledPtr && optionalErrorMsg) {
std::string demangledName(demangledPtr);
*optionalErrorMsg = "Unhandled exception of type " + demangledName;
} else if (optionalErrorMsg) {
*optionalErrorMsg = "Unhandled exception of unknown type";
}
return false;
}
}
virtual void resetRemoteRandomSeed(std::size_t seed) override {
// Re-seed the generator, e.g., when `cudaq::set_random_seed` is called.
randEngine.seed(seed);
}
// The remote-mqpu backend (this class) returns true for all remote
// capabilities unless overridden by environment variable.
virtual RemoteCapabilities getRemoteCapabilities() const override {
// Default to all true, but allow the user to override to all false.
if (getEnvBool("CUDAQ_CLIENT_REMOTE_CAPABILITY_OVERRIDE", true))
return RemoteCapabilities(/*initValues=*/true);
return RemoteCapabilities(/*initValues=*/false);
}
};
/// Base class for the REST client submitting jobs to NVCF-hosted `cudaq-qpud`
/// service.
class BaseNvcfRuntimeClient : public cudaq::BaseRemoteRestRuntimeClient {
protected:
// None: Don't log; Info: basic info; Trace: Timing data per invocation.
enum class LogLevel : int { None = 0, Info, Trace };
// NVQC logging level
// Enabled high-level info log by default (can be set by an environment
// variable)
LogLevel m_logLevel = LogLevel::Info;
// API key for authentication
std::string m_apiKey;
// Rest client to send HTTP request
cudaq::RestClient m_restClient;
// NVCF function Id to use
std::string m_functionId;
// NVCF version Id of that function to use
std::string m_functionVersionId;
// Information about function deployment from environment variable info.
struct FunctionEnvironments {
// These configs should be positive numbers.
int majorVersion{-1};
int minorVersion{-1};
int numGpus{-1};
int timeoutSecs{-1};
int hasSerializedCodeExec{-1}; // -1 means unknown; 0 = false, 1 = true
std::string name;
};
// Available functions: function Id to info mapping
using DeploymentInfo = std::unordered_map<std::string, FunctionEnvironments>;
DeploymentInfo m_availableFuncs;
const std::string CUDAQ_NCA_ID = cudaq::getNvqcNcaId();
// Base URL for NVCF APIs
static inline const std::string m_baseUrl = "api.nvcf.nvidia.com/v2";
// Return the URL to invoke the function specified in this client
std::string nvcfInvocationUrl() const {
return fmt::format("https://{}/nvcf/exec/functions/{}/versions/{}",
m_baseUrl, m_functionId, m_functionVersionId);
}
// Return the URL to request an Asset upload link
std::string nvcfAssetUrl() const {
return fmt::format("https://{}/nvcf/assets", m_baseUrl);
}
// Return the URL to retrieve status/result of an NVCF request.
std::string
nvcfInvocationStatus(const std::string &invocationRequestId) const {
return fmt::format("https://{}/nvcf/exec/status/{}", m_baseUrl,
invocationRequestId);
}
// Construct the REST headers for calling NVCF REST APIs
std::map<std::string, std::string> getHeaders() const {
std::map<std::string, std::string> header{
{"Authorization", fmt::format("Bearer {}", m_apiKey)},
{"Content-type", "application/json"},
{"nvcf-feature-enable-gateway-timeout", "true"},
// The max timeout for the polling response is 20 minutes
// https://docs.nvidia.com/cloud-functions/user-guide/latest/cloud-function/api.html#http-polling
{"NVCF-POLL-SECONDS", "1200"}};
return header;
};
// Helper to retrieve the list of all available versions of the specified
// function Id.
std::vector<cudaq::NvcfFunctionVersionInfo> getFunctionVersions() {
auto headers = getHeaders();
auto versionDataJs = m_restClient.get(
fmt::format("https://{}/nvcf/functions/{}", m_baseUrl, m_functionId),
"/versions", headers, /*enableSsl=*/true);
cudaq::info("Version data: {}", versionDataJs.dump());
std::vector<cudaq::NvcfFunctionVersionInfo> versions;
versionDataJs["functions"].get_to(versions);
return versions;
}
DeploymentInfo
getAllAvailableDeployments(const std::string &functionOverride,
const std::string &versionOverride) {
auto headers = getHeaders();
auto allVisibleFunctions =
m_restClient.get(fmt::format("https://{}/nvcf/functions", m_baseUrl),
"", headers, /*enableSsl=*/true);
const std::string cudaqNvcfFuncNamePrefix = "cuda_quantum";
DeploymentInfo info;
// NCA ID Precedence order is:
// 1. CUDAQ_NCA_ID if it was specifically overriden
// 2. Dev org NCA ID if active dev functions are visible with selected key
// 3. Production NCA ID
const std::string ncaIdToSearch = [&]() {
// Check for override
if (isNvqcNcaIdOverridden())
return CUDAQ_NCA_ID;
// Check to see if dev NCA ID functions are available
for (auto funcInfo : allVisibleFunctions["functions"]) {
if (funcInfo["ncaId"].get<std::string>() ==
std::string(DEV_NVQC_NCA_ID) &&
funcInfo["status"].get<std::string>() == "ACTIVE" &&
funcInfo["name"].get<std::string>().starts_with(
cudaqNvcfFuncNamePrefix)) {
return std::string(DEV_NVQC_NCA_ID);
}
}
// Fallback on production NCA ID
return CUDAQ_NCA_ID;
}();
// Only add functions that are the latest minor version for the major
// version matched by the client.
// I.e. If client 1.x sees server 1.2 and 1.3, choose 1.3.
int highestMinorVersion = 0;
for (auto funcInfo : allVisibleFunctions["functions"]) {
bool matchesOverride =
funcInfo["id"].get<std::string>() == functionOverride ||
funcInfo["versionId"].get<std::string>() == versionOverride;
bool matchesWithoutOverride =
funcInfo["ncaId"].get<std::string>() == ncaIdToSearch &&
funcInfo["status"].get<std::string>() == "ACTIVE" &&
funcInfo["name"].get<std::string>().starts_with(
cudaqNvcfFuncNamePrefix);
if (matchesOverride || matchesWithoutOverride) {
const auto containerEnvs = [&]() -> FunctionEnvironments {
FunctionEnvironments envs;
// Function name convention:
// Example: cuda_quantum_v1_t3600_8x
// ------------ - ---- -
// Prefix | | |
// Version __| | |
// Timeout (secs) __| |
// Number of GPUs __|
// Also supported: cuda_quantum_v1-1_t3600_8x
// Also supported: cuda_quantum_suffix_v1-1_t3600_8x
const std::regex funcNameRegex(
R"(^cuda_quantum_.*v([\d\-]+)_t(\d+)_(\d+)x$)");
// The first match is the whole string.
constexpr std::size_t expectedNumMatches = 4;
std::smatch baseMatch;
const std::string fname = funcInfo["name"].get<std::string>();
auto getMajorMinorVersion = [](const std::string &versionStr) {
std::size_t pos = versionStr.find('-');
int majorVersion = 0;
int minorVersion = 0;
if (pos != std::string::npos) {
majorVersion = std::stoi(versionStr.substr(0, pos));
minorVersion = std::stoi(versionStr.substr(pos + 1));
} else {
// If it doesn't say x.y, then assume it is x.0
majorVersion = std::stoi(versionStr);
minorVersion = 0;
}
return std::make_pair(majorVersion, minorVersion);
};
// If the function name matches 'Production' naming convention,
// retrieve deployment information from the name.
envs.name = fname;
if (std::regex_match(fname, baseMatch, funcNameRegex) &&
baseMatch.size() == expectedNumMatches) {
std::tie(envs.majorVersion, envs.minorVersion) =
getMajorMinorVersion(baseMatch[1].str());
envs.timeoutSecs = std::stoi(baseMatch[2].str());
envs.numGpus = std::stoi(baseMatch[3].str());
envs.hasSerializedCodeExec =
fname.starts_with("cuda_quantum_remote_py") ? 1 : 0;
} else if (funcInfo.contains("containerEnvironment")) {
// Otherwise, retrieve the info from deployment configurations.
// TODO: at some point, we may want to consolidate these two paths
// (name vs. meta-data). We keep it here since function metadata
// (similar to `containerEnvironment`) will be supported in the near
// future.
// Convert to unordered_map
std::unordered_map<std::string, std::string> containerEnvironment;
for (auto it : funcInfo["containerEnvironment"])
containerEnvironment[it["key"].get<std::string>()] =
it["value"].get<std::string>();
// Fetch values
const auto getIntIfFound = [&](const std::string &envKey,
int &varToSet) {
if (auto it = containerEnvironment.find(envKey);
it != containerEnvironment.end())
varToSet = std::stoi(it->second);
};
getIntIfFound("NUM_GPUS", envs.numGpus);
getIntIfFound("WATCHDOG_TIMEOUT_SEC", envs.timeoutSecs);
getIntIfFound("CUDAQ_SER_CODE_EXEC", envs.hasSerializedCodeExec);
if (auto it =
containerEnvironment.find("NVQC_REST_PAYLOAD_VERSION");
it != containerEnvironment.end())
std::tie(envs.majorVersion, envs.minorVersion) =
getMajorMinorVersion(it->second);
}
// Note: invalid/uninitialized FunctionEnvironments will be
// discarded, i.e., not added to the valid deployment list, since the
// API version number will not match.
return envs;
}();
// Only add functions that match client version, unless overridden
if (matchesOverride || containerEnvs.majorVersion == version()) {
info[funcInfo["id"].get<std::string>()] = containerEnvs;
highestMinorVersion =
std::max(highestMinorVersion, containerEnvs.minorVersion);
}
}
}
// Now make a pass through info and remove all the lower minor versions.
if (functionOverride.empty()) {
std::vector<std::string> funcsToRemove;
for (auto &iter : info)
if (iter.second.minorVersion != highestMinorVersion)
funcsToRemove.push_back(iter.first);
for (auto &funcToRemove : funcsToRemove)
info.erase(funcToRemove);
}
return info;
}
std::optional<std::size_t> getQueueDepth(const std::string &funcId,
const std::string &verId) {
auto headers = getHeaders();
try {
auto queueDepthInfo = m_restClient.get(
fmt::format("https://{}/nvcf/queues/functions/{}/versions/{}",
m_baseUrl, funcId, verId),
"", headers, /*enableSsl=*/true);
if (queueDepthInfo.contains("functionId") &&
queueDepthInfo["functionId"] == funcId &&
queueDepthInfo.contains("queues")) {
for (auto queueInfo : queueDepthInfo["queues"]) {
if (queueInfo.contains("functionVersionId") &&
queueInfo["functionVersionId"] == verId &&
queueInfo.contains("queueDepth")) {
return queueInfo["queueDepth"].get<std::size_t>();
}
}
}
return std::nullopt;
} catch (...) {
// Make this non-fatal. Returns null, i.e., unknown.
return std::nullopt;
}
}
// Fetch the queue position of the given request ID. If the job has already
// begun execution, it will return `std::nullopt`.
std::optional<std::size_t> getQueuePosition(const std::string &requestId) {
auto headers = getHeaders();
try {
auto queuePos =
m_restClient.get(fmt::format("https://{}/nvcf/queues/{}/position",
m_baseUrl, requestId),
"", headers, /*enableSsl=*/true);
if (queuePos.contains("positionInQueue"))
return queuePos["positionInQueue"].get<std::size_t>();
// When the job enters execution, it returns "status": 400 and "title":
// "Bad Request", so translate that to `std::nullopt`.
return std::nullopt;
} catch (...) {
// Make this non-fatal. Returns null, i.e., unknown.
return std::nullopt;
}
}
public:
virtual void setConfig(
const std::unordered_map<std::string, std::string> &configs) override {
{
// Check if user set a specific log level (e.g., disable logging)
if (auto logConfigEnv = std::getenv("NVQC_LOG_LEVEL")) {
auto logConfig = std::string(logConfigEnv);
std::transform(logConfig.begin(), logConfig.end(), logConfig.begin(),
[](unsigned char c) { return std::tolower(c); });
if (logConfig == "0" || logConfig == "off" || logConfig == "false" ||
logConfig == "no" || logConfig == "none")
m_logLevel = LogLevel::None;
if (logConfig == "trace")
m_logLevel = LogLevel::Trace;
if (logConfig == "info")
m_logLevel = LogLevel::Info;
}
}
{
const auto apiKeyIter = configs.find("api-key");
if (apiKeyIter != configs.end())
m_apiKey = apiKeyIter->second;
if (m_apiKey.empty())
throw std::runtime_error("No NVQC API key is provided.");
}
// Save some iterators to be used later
const auto funcIdIter = configs.find("function-id");
const auto versionIdIter = configs.find("version-id");
const auto nGpusIter = configs.find("ngpus");
// Default is 1 GPU if none specified
const int numGpusRequested =
(nGpusIter != configs.end()) ? std::stoi(nGpusIter->second) : 1;
// Override strings for function id and function version
const auto functionOverride = [&]() -> std::string {
if (funcIdIter == configs.end())
return "";
return funcIdIter->second;
}();
const auto versionOverride = [&]() -> std::string {
if (versionIdIter == configs.end())
return "";
return versionIdIter->second;
}();
// Pass the optional overrides to getAllAvailableDeployments so that it will
// return information about functions if they are manually specified by the
// user, even if they don't conform to naming conventions.
m_availableFuncs =
getAllAvailableDeployments(functionOverride, versionOverride);
for (const auto &[funcId, info] : m_availableFuncs)
cudaq::info("Function Id {} (API version {}.{}) has {} GPUs.", funcId,
info.majorVersion, info.minorVersion, info.numGpus);
{
if (funcIdIter != configs.end()) {
// User overrides a specific function Id.
m_functionId = funcIdIter->second;
if (m_logLevel > LogLevel::None) {
// Print out the configuration
cudaq::log("Submitting jobs to NVQC using function Id {}.",
m_functionId);
}
} else {
// Output an error message if no deployments can be found.
if (m_availableFuncs.empty())
throw std::runtime_error(
"Unable to find any active NVQC deployments for this key. Check "
"if you see any active functions on ngc.nvidia.com in the cloud "
"functions tab, or try to regenerate the key.");
// Determine the function Id based on the number of GPUs
cudaq::info("Looking for an NVQC deployment that has {} GPUs.",
numGpusRequested);
for (const auto &[funcId, info] : m_availableFuncs) {
if (info.numGpus == numGpusRequested) {
m_functionId = funcId;
if (m_logLevel > LogLevel::None) {
// Print out the configuration
cudaq::log(
"Submitting jobs to NVQC service with {} GPU(s). Max "
"execution time: {} seconds (excluding queue wait time).",
info.numGpus, info.timeoutSecs);
}
break;
}
}
if (m_functionId.empty()) {
// Make sure that we sort the GPU count list
std::set<std::size_t> gpuCounts;
for (const auto &[funcId, info] : m_availableFuncs) {
gpuCounts.emplace(info.numGpus);
}
std::stringstream ss;
ss << "Unable to find NVQC deployment with " << numGpusRequested
<< " GPUs.\nAvailable deployments have ";
ss << fmt::format("{}", gpuCounts) << " GPUs.\n";
ss << "Please check your 'ngpus' value (Python) or `--nvqc-ngpus` "
"value (C++).\n";
throw std::runtime_error(ss.str());
}
}
}
{
auto versions = getFunctionVersions();
// Check if a version Id is set
if (versionIdIter != configs.end()) {
m_functionVersionId = versionIdIter->second;
// Do a sanity check that this is an active version (i.e., usable).
const auto versionInfoIter =
std::find_if(versions.begin(), versions.end(),
[&](const cudaq::NvcfFunctionVersionInfo &info) {
return info.versionId == m_functionVersionId;
});
// Invalid version Id.
if (versionInfoIter == versions.end())
throw std::runtime_error(
fmt::format("Version Id '{}' is not valid for NVQC function Id "
"'{}'. Please check your NVQC configurations.",
m_functionVersionId, m_functionId));
// The version is not active/deployed.
if (versionInfoIter->status != cudaq::FunctionStatus::ACTIVE)
throw std::runtime_error(
fmt::format("Version Id '{}' of NVQC function Id "
"'{}' is not ACTIVE. Please check your NVQC "
"configurations or contact support.",
m_functionVersionId, m_functionId));
} else {
// No version Id is set. Just pick the latest version of the function
// Id. The timestamp is an ISO 8601 string, e.g.,
// 2024-01-25T04:14:46.360Z. To sort it from latest to oldest, we can
// use string sorting.
std::sort(versions.begin(), versions.end(),
[](const auto &a, const auto &b) {
return a.createdAt > b.createdAt;
});
for (const auto &versionInfo : versions)
cudaq::info("Found version Id {}, created at {}",
versionInfo.versionId, versionInfo.createdAt);
auto activeVersions =
versions |
std::ranges::views::filter(
[](const cudaq::NvcfFunctionVersionInfo &info) {
return info.status == cudaq::FunctionStatus::ACTIVE;
});
if (activeVersions.empty())
throw std::runtime_error(
fmt::format("No active version available for NVQC function Id "
"'{}'. Please check your function Id.",
m_functionId));
m_functionVersionId = activeVersions.front().versionId;
cudaq::info("Selected the latest version Id {} for function Id {}",
m_functionVersionId, m_functionId);
}
}
}
// The NVCF version of this function needs to dynamically determine the remote
// capabilities based on the servers currently deployed.
virtual RemoteCapabilities getRemoteCapabilities() const override {
// Allow the user to override to all true.
if (getEnvBool("CUDAQ_CLIENT_REMOTE_CAPABILITY_OVERRIDE", false))
return RemoteCapabilities(/*initValues=*/true);
// Else determine capabilities based on server deployment info.
RemoteCapabilities capabilities(/*initValues=*/false);
if (!m_availableFuncs.contains(m_functionId)) {
// The user has manually overridden an NVQC function selection, but it
// wasn't found in m_availableFuncs.
cudaq::info(
"Function id overriden ({}) but cannot retrieve its remote "
"capabilities because a deployment for it was not found. Will assume "
"all optional remote capabilities are unsupported. You can set "
"CUDAQ_CLIENT_REMOTE_CAPABILITY_OVERRIDE=1 if you wish to override "
"this.",
m_functionId);
return capabilities;
}
const auto &funcEnv = m_availableFuncs.at(m_functionId);
capabilities.serializedCodeExec = funcEnv.hasSerializedCodeExec > 0;
capabilities.stateOverlap =
funcEnv.majorVersion > 1 ||
(funcEnv.majorVersion >= 1 && funcEnv.minorVersion >= 1);
capabilities.vqe = funcEnv.majorVersion > 1 ||
(funcEnv.majorVersion >= 1 && funcEnv.minorVersion >= 1);
capabilities.isRemoteSimulator = true;
return capabilities;
}
virtual bool
sendRequest(mlir::MLIRContext &mlirContext,
cudaq::ExecutionContext &io_context,
cudaq::SerializedCodeExecutionContext *serializedCodeContext,
cudaq::gradient *vqe_gradient, cudaq::optimizer *vqe_optimizer,
const int vqe_n_params, const std::string &backendSimName,
const std::string &kernelName, void (*kernelFunc)(void *),
const void *kernelArgs, std::uint64_t argsSize,
std::string *optionalErrorMsg,
const std::vector<void *> *rawArgs) override {
if (isDisallowed(io_context.name))
throw std::runtime_error(
io_context.name +
" operation is not supported with cudaq target nvqc!");
static const std::vector<std::string> MULTI_GPU_BACKENDS = {
"tensornet", "nvidia-mgpu", "nvidia-mqpu"};
{
// Print out a message if users request a multi-GPU deployment while
// setting the backend to a single-GPU one. Only print once in case this
// is a execution loop.
static bool printOnce = false;
if (m_availableFuncs[m_functionId].numGpus > 1 &&
std::find(MULTI_GPU_BACKENDS.begin(), MULTI_GPU_BACKENDS.end(),
backendSimName) == MULTI_GPU_BACKENDS.end() &&
!printOnce) {
std::cout << "The requested backend simulator (" << backendSimName
<< ") is not capable of using all "
<< m_availableFuncs[m_functionId].numGpus
<< " GPUs requested.\n";
std::cout << "Only one GPU will be used for simulation.\n";
std::cout << "Please refer to CUDA-Q documentation for a list of "
"multi-GPU capable simulator backends.\n";
printOnce = true;
}
}
// Construct the base `cudaq-qpud` request payload.
cudaq::RestRequest request = [&]() {
if (vqe_n_params > 0)
return constructVQEJobRequest(mlirContext, io_context, backendSimName,
kernelName, kernelArgs, vqe_gradient,
*vqe_optimizer, vqe_n_params, rawArgs);
return constructJobRequest(mlirContext, io_context, serializedCodeContext,
backendSimName, kernelName, kernelFunc,
kernelArgs, argsSize, rawArgs);
}();
if (request.code.empty() && (serializedCodeContext == nullptr ||
serializedCodeContext->source_code.empty())) {
if (optionalErrorMsg)
*optionalErrorMsg =
std::string(
"Failed to construct/retrieve kernel IR for kernel named ") +
kernelName;
return false;
}
if (request.format != cudaq::CodeFormat::MLIR &&
serializedCodeContext == nullptr) {
// The `.config` file may have been tampered with.
std::cerr << "Internal error: unsupported kernel IR detected.\nThis may "
"indicate a corrupted CUDA-Q installation.";
std::abort();
}
// Max message size that we can send in the body
constexpr std::size_t MAX_SIZE_BYTES = 250000; // 250 KB
json requestJson;
auto jobHeader = getHeaders();
std::optional<std::string> assetId;
// Make sure that we delete the asset that we've uploaded when this
// `sendRequest` function exits (success or not).
ScopeExit deleteAssetOnExit([&]() {
if (assetId.has_value()) {
cudaq::info("Deleting NVQC Asset Id {}", assetId.value());