@@ -1440,6 +1440,147 @@ async def test_execute_redis_batch_rate_limiter_script_cluster_compatibility():
14401440 assert len (unique_processed_keys ) >= 2 , "Should have processed at least some keys"
14411441
14421442
1443+ @pytest .mark .asyncio
1444+ async def test_multiple_rate_limits_per_descriptor ():
1445+ """
1446+ Test that the IndexError fix works correctly when a descriptor has multiple rate limit types.
1447+
1448+ This specifically tests the scenario where:
1449+ 1. A descriptor has multiple rate limit types (requests, tokens, max_parallel_requests)
1450+ 2. Multiple statuses are generated for a single descriptor
1451+ 3. The old floor(i / 2) mapping would fail with IndexError
1452+ 4. The new descriptor_key-based lookup works correctly
1453+ """
1454+ _api_key = "sk-12345"
1455+ _api_key_hash = hash_token (_api_key )
1456+
1457+ # Create a user with multiple rate limit types to trigger multiple statuses per descriptor
1458+ user_api_key_dict = UserAPIKeyAuth (
1459+ api_key = _api_key_hash ,
1460+ rpm_limit = 2 , # requests limit
1461+ tpm_limit = 10 , # tokens limit
1462+ max_parallel_requests = 1 , # parallel requests limit
1463+ )
1464+
1465+ local_cache = DualCache ()
1466+ parallel_request_handler = _PROXY_MaxParallelRequestsHandler (
1467+ internal_usage_cache = InternalUsageCache (local_cache )
1468+ )
1469+
1470+ # Mock should_rate_limit to return a response with multiple statuses where one hits the limit
1471+ # This simulates the case where we have more statuses than descriptors due to multiple rate limit types
1472+ async def mock_should_rate_limit (descriptors , ** kwargs ):
1473+ # Verify we have one descriptor but will generate multiple statuses
1474+ assert len (descriptors ) == 1 , "Should have exactly one api_key descriptor"
1475+ assert descriptors [0 ]["key" ] == "api_key" , "Descriptor should be for api_key"
1476+
1477+ # Return multiple statuses for the single descriptor (requests OK, tokens OK, parallel OVER_LIMIT)
1478+ return {
1479+ "overall_code" : "OVER_LIMIT" ,
1480+ "statuses" : [
1481+ {
1482+ "code" : "OK" ,
1483+ "current_limit" : 2 ,
1484+ "limit_remaining" : 1 ,
1485+ "rate_limit_type" : "requests" ,
1486+ "descriptor_key" : "api_key"
1487+ },
1488+ {
1489+ "code" : "OK" ,
1490+ "current_limit" : 10 ,
1491+ "limit_remaining" : 8 ,
1492+ "rate_limit_type" : "tokens" ,
1493+ "descriptor_key" : "api_key"
1494+ },
1495+ {
1496+ "code" : "OVER_LIMIT" ,
1497+ "current_limit" : 1 ,
1498+ "limit_remaining" : - 1 ,
1499+ "rate_limit_type" : "max_parallel_requests" ,
1500+ "descriptor_key" : "api_key"
1501+ }
1502+ ]
1503+ }
1504+
1505+ parallel_request_handler .should_rate_limit = mock_should_rate_limit
1506+
1507+ # Test the pre-call hook - this should raise HTTPException but NOT IndexError
1508+ with pytest .raises (HTTPException ) as exc_info :
1509+ await parallel_request_handler .async_pre_call_hook (
1510+ user_api_key_dict = user_api_key_dict ,
1511+ cache = local_cache ,
1512+ data = {"model" : "gpt-3.5-turbo" },
1513+ call_type = "" ,
1514+ )
1515+
1516+ # Verify the exception details are correct and use the descriptor_key approach
1517+ assert exc_info .value .status_code == 429
1518+ assert "Rate limit exceeded for api_key:" in exc_info .value .detail
1519+ assert "max_parallel_requests" in exc_info .value .detail
1520+ assert "Current limit: 1" in exc_info .value .detail
1521+ assert "Remaining: 0" in exc_info .value .detail # max(0, -1) = 0
1522+
1523+ # Verify headers are set correctly
1524+ assert exc_info .value .headers .get ("rate_limit_type" ) == "max_parallel_requests"
1525+ assert "retry-after" in exc_info .value .headers
1526+ assert "reset_at" in exc_info .value .headers
1527+
1528+
1529+ @pytest .mark .asyncio
1530+ async def test_missing_descriptor_fallback ():
1531+ """
1532+ Test that the fallback works when a descriptor_key cannot be found in the descriptors list.
1533+
1534+ This tests an edge case where somehow the descriptor_key in status doesn't match
1535+ any descriptor key (shouldn't happen in normal operation but good for robustness).
1536+ """
1537+ _api_key = "sk-12345"
1538+ _api_key_hash = hash_token (_api_key )
1539+
1540+ user_api_key_dict = UserAPIKeyAuth (
1541+ api_key = _api_key_hash ,
1542+ rpm_limit = 2 ,
1543+ )
1544+
1545+ local_cache = DualCache ()
1546+ parallel_request_handler = _PROXY_MaxParallelRequestsHandler (
1547+ internal_usage_cache = InternalUsageCache (local_cache )
1548+ )
1549+
1550+ # Mock should_rate_limit to return a status with descriptor_key that doesn't match descriptors
1551+ async def mock_should_rate_limit (descriptors , ** kwargs ):
1552+ # Return a status with a mismatched descriptor_key to test fallback
1553+ return {
1554+ "overall_code" : "OVER_LIMIT" ,
1555+ "statuses" : [
1556+ {
1557+ "code" : "OVER_LIMIT" ,
1558+ "current_limit" : 2 ,
1559+ "limit_remaining" : - 1 ,
1560+ "rate_limit_type" : "requests" ,
1561+ "descriptor_key" : "nonexistent_key" # This won't match any descriptor
1562+ }
1563+ ]
1564+ }
1565+
1566+ parallel_request_handler .should_rate_limit = mock_should_rate_limit
1567+
1568+ # Test the pre-call hook - should handle missing descriptor gracefully
1569+ with pytest .raises (HTTPException ) as exc_info :
1570+ await parallel_request_handler .async_pre_call_hook (
1571+ user_api_key_dict = user_api_key_dict ,
1572+ cache = local_cache ,
1573+ data = {"model" : "gpt-3.5-turbo" },
1574+ call_type = "" ,
1575+ )
1576+
1577+ # Verify the exception uses fallback values
1578+ assert exc_info .value .status_code == 429
1579+ assert "Rate limit exceeded for nonexistent_key: unknown" in exc_info .value .detail
1580+ assert "requests" in exc_info .value .detail
1581+ assert "Current limit: 2" in exc_info .value .detail
1582+
1583+
14431584@pytest .mark .asyncio
14441585async def test_execute_token_increment_script_cluster_compatibility ():
14451586 """
0 commit comments