|
| 1 | +[2024-11-18 17:23:54,543 systems.py:197 INFO] Found unknown device in GPU connection topology: NIC0. Skipping. |
| 2 | +[2024-11-18 17:23:54,621 main.py:229 INFO] Detected system ID: KnownSystem.sc1 |
| 3 | +[2024-11-18 17:23:56,362 generate_conf_files.py:107 INFO] Generated measurements/ entries for sc1_TRT/stable-diffusion-xl/Offline |
| 4 | +[2024-11-18 17:23:56,363 __init__.py:46 INFO] Running command: python3 -m code.stable-diffusion-xl.tensorrt.harness --logfile_outdir="/home/lry/CM/repos/local/cache/6c0ba4746fa74e77/test_results/mlperf_inference_lry_40-nvidia_original-gpu-tensorrt-vdefault-scc24-main/stable-diffusion-xl/offline/accuracy" --logfile_prefix="mlperf_log_" --performance_sample_count=5000 --test_mode="AccuracyOnly" --gpu_batch_size=8 --mlperf_conf_path="/home/lry/CM/repos/local/cache/3e2d12440d5a4a93/inference/mlperf.conf" --tensor_path="build/preprocessed_data/coco2014-tokenized-sdxl/5k_dataset_final/" --use_graphs=true --user_conf_path="/home/lry/CM/repos/mlcommons@cm4mlops/script/generate-mlperf-inference-user-conf/tmp/593e2d3b46e640629d0ab5c1e4ff088a.conf" --gpu_inference_streams=1 --gpu_copy_streams=1 --gpu_engines="./build/engines/sc1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIP-Offline-gpu-b8-fp16.custom_k_99_MaxP.plan,./build/engines/sc1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIPWithProj-Offline-gpu-b8-fp16.custom_k_99_MaxP.plan,./build/engines/sc1/stable-diffusion-xl/Offline/stable-diffusion-xl-UNetXL-Offline-gpu-b8-int8.custom_k_99_MaxP.plan,./build/engines/sc1/stable-diffusion-xl/Offline/stable-diffusion-xl-VAE-Offline-gpu-b8-fp32.custom_k_99_MaxP.plan" --scenario Offline --model stable-diffusion-xl |
| 5 | +[2024-11-18 17:23:56,363 __init__.py:53 INFO] Overriding Environment |
| 6 | +[2024-11-18 17:23:58,943 systems.py:197 INFO] Found unknown device in GPU connection topology: NIC0. Skipping. |
| 7 | +2024-11-18 17:24:00,676 INFO worker.py:1567 -- Connecting to existing Ray cluster at address: 10.0.0.1:6379... |
| 8 | +2024-11-18 17:24:00,683 INFO worker.py:1743 -- Connected to Ray cluster. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m |
| 9 | +[2024-11-18 17:24:00,875 harness.py:207 INFO] Start Warm Up! |
| 10 | +[36m(SDXLCore pid=107737)[0m [2024-11-18 17:24:03,968 backend.py:428 INFO] initialized |
| 11 | +[36m(SDXLCore pid=107737)[0m [2024-11-18 17:24:04,150 backend.py:72 INFO] Loading TensorRT engine: ./build/engines/sc1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIP-Offline-gpu-b8-fp16.custom_k_99_MaxP.plan. |
| 12 | +[36m(SDXLCore pid=107737)[0m [2024-11-18 17:24:04,323 backend.py:72 INFO] Loading TensorRT engine: ./build/engines/sc1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIPWithProj-Offline-gpu-b8-fp16.custom_k_99_MaxP.plan. |
| 13 | +[36m(SDXLCore pid=107737)[0m [2024-11-18 17:24:07,326 backend.py:97 INFO] Enabling cuda graphs for unet |
| 14 | +[36m(SDXLCore pid=107737)[0m [2024-11-18 17:24:07,767 backend.py:155 INFO] captured graph for BS=1 |
| 15 | +[36m(SDXLCore pid=107737)[0m [2024-11-18 17:24:08,559 backend.py:155 INFO] captured graph for BS=2 |
| 16 | +[36m(SDXLCore pid=9090, ip=10.0.0.3)[0m [2024-11-18 17:24:01,717 backend.py:428 INFO] initialized[32m [repeated 8x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)[0m |
| 17 | +[36m(SDXLCore pid=107744)[0m [2024-11-18 17:24:06,876 backend.py:72 INFO] Loading TensorRT engine: ./build/engines/sc1/stable-diffusion-xl/Offline/stable-diffusion-xl-VAE-Offline-gpu-b8-fp32.custom_k_99_MaxP.plan.[32m [repeated 34x across cluster][0m |
| 18 | +[36m(SDXLCore pid=107744)[0m [2024-11-18 17:24:08,645 backend.py:97 INFO] Enabling cuda graphs for unet[32m [repeated 8x across cluster][0m |
| 19 | +[36m(SDXLCore pid=9089, ip=10.0.0.3)[0m [2024-11-18 17:24:09,783 backend.py:155 INFO] captured graph for BS=7[32m [repeated 51x across cluster][0m |
| 20 | +[2024-11-18 17:24:33,625 harness.py:209 INFO] Warm Up Done! |
| 21 | +[2024-11-18 17:24:33,625 harness.py:211 INFO] Start Test! |
| 22 | +[2024-11-18 17:24:33,731 backend.py:852 INFO] 500 |
| 23 | +[36m(SDXLCore pid=107737)[0m [2024-11-18 17:24:33,733 backend.py:630 INFO] generate_images |
| 24 | +[36m(SDXLCore pid=107741)[0m [2024-11-18 17:24:14,698 backend.py:155 INFO] captured graph for BS=8[32m [repeated 19x across cluster][0m |
| 25 | +[36m(SDXLCore pid=9088, ip=10.0.0.3)[0m [2024-11-18 17:24:38,439 backend.py:630 INFO] generate_images[32m [repeated 9x across cluster][0m |
| 26 | +[36m(SDXLCore pid=9088, ip=10.0.0.3)[0m [2024-11-18 17:24:46,259 backend.py:630 INFO] generate_images[32m [repeated 9x across cluster][0m |
| 27 | +[36m(SDXLCore pid=9089, ip=10.0.0.3)[0m [2024-11-18 17:24:54,006 backend.py:630 INFO] generate_images[32m [repeated 9x across cluster][0m |
| 28 | +[36m(SDXLCore pid=107737)[0m [2024-11-18 17:25:02,027 backend.py:630 INFO] generate_images[32m [repeated 8x across cluster][0m |
| 29 | +[36m(SDXLCore pid=107738)[0m [2024-11-18 17:25:10,624 backend.py:630 INFO] generate_images[32m [repeated 5x across cluster][0m |
| 30 | +[36m(SDXLCore pid=107738)[0m [2024-11-18 17:25:19,815 backend.py:630 INFO] generate_images[32m [repeated 9x across cluster][0m |
| 31 | +[36m(SDXLCore pid=107738)[0m [2024-11-18 17:25:29,041 backend.py:630 INFO] generate_images[32m [repeated 9x across cluster][0m |
| 32 | +[2024-11-18 17:25:40,770 backend.py:901 INFO] [Server] Received 500 total samples |
| 33 | +[2024-11-18 17:25:40,773 backend.py:911 INFO] [Device 0] Reported 56 samples |
| 34 | +[2024-11-18 17:25:40,774 backend.py:911 INFO] [Device 1] Reported 56 samples |
| 35 | +[2024-11-18 17:25:40,775 backend.py:911 INFO] [Device 2] Reported 56 samples |
| 36 | +[2024-11-18 17:25:40,777 backend.py:911 INFO] [Device 3] Reported 56 samples |
| 37 | +[2024-11-18 17:25:40,778 backend.py:911 INFO] [Device 4] Reported 56 samples |
| 38 | +[2024-11-18 17:25:40,780 backend.py:911 INFO] [Device 5] Reported 55 samples |
| 39 | +[2024-11-18 17:25:40,782 backend.py:911 INFO] [Device 6] Reported 55 samples |
| 40 | +[2024-11-18 17:25:40,783 backend.py:911 INFO] [Device 7] Reported 55 samples |
| 41 | +[2024-11-18 17:25:40,784 backend.py:911 INFO] [Device 8] Reported 55 samples |
| 42 | +[2024-11-18 17:25:40,784 harness.py:214 INFO] Test Done! |
| 43 | +[2024-11-18 17:25:40,784 harness.py:216 INFO] Destroying SUT... |
| 44 | +[2024-11-18 17:25:40,784 harness.py:219 INFO] Destroying QSL... |
| 45 | +[36m(SDXLCore pid=107737)[0m [2024-11-18 17:25:30,624 backend.py:630 INFO] generate_images[32m [repeated 4x across cluster][0m |
| 46 | +benchmark : Benchmark.SDXL |
| 47 | +buffer_manager_thread_count : 0 |
| 48 | +data_dir : /home/lry/CM/repos/local/cache/d2b9079c1073417b/data |
| 49 | +gpu_batch_size : 8 |
| 50 | +gpu_copy_streams : 1 |
| 51 | +gpu_inference_streams : 1 |
| 52 | +input_dtype : int32 |
| 53 | +input_format : linear |
| 54 | +log_dir : /home/lry/CM/repos/local/cache/3443882dd9374096/repo/closed/NVIDIA/build/logs/2024.11.18-17.23.50 |
| 55 | +mlperf_conf_path : /home/lry/CM/repos/local/cache/3e2d12440d5a4a93/inference/mlperf.conf |
| 56 | +model_path : /home/lry/CM/repos/local/cache/d2b9079c1073417b/models/SDXL/ |
| 57 | +offline_expected_qps : 0.0 |
| 58 | +precision : int8 |
| 59 | +preprocessed_data_dir : /home/lry/CM/repos/local/cache/d2b9079c1073417b/preprocessed_data |
| 60 | +scenario : Scenario.Offline |
| 61 | +system : SystemConfiguration(host_cpu_conf=CPUConfiguration(layout={CPU(name='AMD EPYC 9684X 96-Core Processor', architecture=<CPUArchitecture.x86_64: AliasedName(name='x86_64', aliases=(), patterns=())>, core_count=96, threads_per_core=2): 2}), host_mem_conf=MemoryConfiguration(host_memory_capacity=Memory(quantity=791.59486, byte_suffix=<ByteSuffix.GB: (1000, 3)>, _num_bytes=791594860000), comparison_tolerance=0.05), accelerator_conf=AcceleratorConfiguration(layout=defaultdict(<class 'int'>, {GPU(name='NVIDIA H100 80GB HBM3', accelerator_type=<AcceleratorType.Discrete: AliasedName(name='Discrete', aliases=(), patterns=())>, vram=Memory(quantity=79.6474609375, byte_suffix=<ByteSuffix.GiB: (1024, 3)>, _num_bytes=85520809984), max_power_limit=700.0, pci_id='0x233010DE', compute_sm=90): 5})), numa_conf=NUMAConfiguration(numa_nodes={}, num_numa_nodes=2), system_id='sc1') |
| 62 | +tensor_path : build/preprocessed_data/coco2014-tokenized-sdxl/5k_dataset_final/ |
| 63 | +test_mode : AccuracyOnly |
| 64 | +use_graphs : True |
| 65 | +user_conf_path : /home/lry/CM/repos/mlcommons@cm4mlops/script/generate-mlperf-inference-user-conf/tmp/593e2d3b46e640629d0ab5c1e4ff088a.conf |
| 66 | +system_id : sc1 |
| 67 | +config_name : sc1_stable-diffusion-xl_Offline |
| 68 | +workload_setting : WorkloadSetting(HarnessType.Custom, AccuracyTarget.k_99, PowerSetting.MaxP) |
| 69 | +optimization_level : plugin-enabled |
| 70 | +num_profiles : 1 |
| 71 | +config_ver : custom_k_99_MaxP |
| 72 | +accuracy_level : 99% |
| 73 | +inference_server : custom |
| 74 | +skip_file_checks : False |
| 75 | +power_limit : None |
| 76 | +cpu_freq : None |
| 77 | +[36m(SDXLCore pid=107737)[0m [I] Loading bytes from ./build/engines/sc1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIP-Offline-gpu-b8-fp16.custom_k_99_MaxP.plan |
| 78 | +[36m(SDXLCore pid=107737)[0m [I] Loading bytes from ./build/engines/sc1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIPWithProj-Offline-gpu-b8-fp16.custom_k_99_MaxP.plan |
| 79 | +[36m(SDXLCore pid=107744)[0m [I] Loading bytes from ./build/engines/sc1/stable-diffusion-xl/Offline/stable-diffusion-xl-VAE-Offline-gpu-b8-fp32.custom_k_99_MaxP.plan[32m [repeated 34x across cluster][0m |
| 80 | +[2024-11-18 17:25:42,171 run_harness.py:166 INFO] Result: Accuracy run detected. |
| 81 | + |
| 82 | +======================== Result summaries: ======================== |
| 83 | + |
0 commit comments