AI-Hypercomputer
diff --git a/‎.github/workflows/add_label.yaml
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/add_label.yaml
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/release.yaml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/release.yaml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/unit_tests.yaml
Lines changed: 5 additions & 3 deletions b/‎.github/workflows/unit_tests.yaml
Lines changed: 5 additions & 3 deletions
diff --git a/‎README.md
Lines changed: 3 additions & 0 deletions b/‎README.md
Lines changed: 3 additions & 0 deletions
diff --git a/‎benchmarks/benchmark_serving.py
Lines changed: 31 additions & 23 deletions b/‎benchmarks/benchmark_serving.py
Lines changed: 31 additions & 23 deletions
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# Test 
 
 name: Add Label
 
 
@@ -33,7 +33,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-20.04]
+        os: [ubuntu-24.04]
         python-version: ['3.10']
     steps:
     - name: Checkout
@@ -59,7 +59,7 @@ jobs:
     needs: release
     strategy:
       matrix:
-        os: [ubuntu-20.04]
+        os: [ubuntu-24.04]
         python-version: ['3.10']
     environment:
       name: pypi
 
@@ -31,7 +31,7 @@ jobs:
     name: "Python type/lint/format checks"
     strategy:
       matrix:
-        os: [ubuntu-20.04]
+        os: [ubuntu-24.04]
         python-version: ['3.10']
     runs-on: ${{ matrix.os }}
     steps:
@@ -54,7 +54,7 @@ jobs:
     name: "JetStream unit tests"
     strategy:
       matrix:
-        os: [ubuntu-20.04]
+        os: [ubuntu-24.04]
         python-version: ['3.10']
     runs-on: ${{ matrix.os }}
     steps:
@@ -68,5 +68,7 @@ jobs:
       run: make install-deps
     - name: Run all unit tests in JetStream (jetstream/tests)
       run: make unit-tests
+    - name: Run all py tests in JetStream (jetstream/tests)
+      run: make py-tests
     - name: Create test coverage report
-      run: make check-test-coverage
+      run: make check-test-coverage
@@ -65,6 +65,9 @@ python -m unittest -v jetstream.tests.core.test_orchestrator
 # Test JetStream core server library
 python -m unittest -v jetstream.tests.core.test_server
 
+# Test JetStream lora adapter tensorstore
+python -m unittest -v jetstream.tests.core.lora.test_adapter_tensorstore
+
 # Test mock JetStream engine implementation
 python -m unittest -v jetstream.tests.engine.test_mock_engine
 
 
@@ -625,29 +625,37 @@ async def grpc_async_request(
 ) -> tuple[list[int], float, float, float]:
   """Send grpc synchronous request since the current grpc server is sync."""
   options = [("grpc.keepalive_timeout_ms", 10000)]
-  async with grpc.aio.insecure_channel(api_url, options=options) as channel:
-    stub = jetstream_pb2_grpc.OrchestratorStub(channel)
-    request_start_time = time.perf_counter()
-    response = stub.Decode(request)
-    token_list = []
-    ttft = 0
-    ttst = 0
-    stream_resp_cnt = 0
-    async for resp in response:
-      stream_resp_cnt += 1
-      if stream_resp_cnt == 1:
-        await prefill_quota.inc()
-        ttft = time.perf_counter() - request_start_time
-        if ttft > 2.0:
-          print(datetime.now(), f"slow TTFT {ttft:.2f}", prefill_quota.value())
-      elif stream_resp_cnt == 2:
-        ttst = time.perf_counter() - request_start_time
-      resp_tokens = resp.stream_content.samples[0].token_ids
-      token_list.extend(resp_tokens)
-      out_token_cnt.increment(len(resp_tokens))
-    await active_req_quota.inc()
-    req_latency = time.perf_counter() - request_start_time
-    return token_list, ttft, ttst, req_latency
+  # Retry connection while server is not ready.
+  while True:
+    try:
+      async with grpc.aio.insecure_channel(api_url, options=options) as channel:
+        stub = jetstream_pb2_grpc.OrchestratorStub(channel)
+        request_start_time = time.perf_counter()
+        response = stub.Decode(request)
+        token_list = []
+        ttft = 0
+        ttst = 0
+        stream_resp_cnt = 0
+        async for resp in response:
+          stream_resp_cnt += 1
+          if stream_resp_cnt == 1:
+            await prefill_quota.inc()
+            ttft = time.perf_counter() - request_start_time
+            if ttft > 2.0:
+              print(
+                  datetime.now(), f"slow TTFT {ttft:.2f}", prefill_quota.value()
+              )
+          elif stream_resp_cnt == 2:
+            ttst = time.perf_counter() - request_start_time
+          resp_tokens = resp.stream_content.samples[0].token_ids
+          token_list.extend(resp_tokens)
+          out_token_cnt.increment(len(resp_tokens))
+        await active_req_quota.inc()
+        req_latency = time.perf_counter() - request_start_time
+        return token_list, ttft, ttst, req_latency
+    except grpc.aio.AioRpcError as e:
+      print(e)
+      await asyncio.sleep(10)
 
 
 async def send_request(