Skip to content

Inference regions #338

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion lib/ruby_llm/providers/bedrock/models.rb
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,17 @@ def model_id_with_region(model_id, model_data)
return model_id unless model_data['inferenceTypesSupported']&.include?('INFERENCE_PROFILE')
return model_id if model_data['inferenceTypesSupported']&.include?('ON_DEMAND')

"us.#{model_id}"
region_prefix = inference_profile_region_prefix
"#{region_prefix}.#{model_id}"
end

def inference_profile_region_prefix
# Extract region prefix from bedrock_region (e.g., "eu-west-3" -> "eu")
region = @config.bedrock_region.to_s
return 'us' if region.empty? # Default fallback

# Take first two characters as the region prefix
region[0, 2]
end
end
end
Expand Down
104 changes: 104 additions & 0 deletions spec/ruby_llm/providers/bedrock/models_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -101,4 +101,108 @@
end
end
end

# New specs for region-aware inference profile handling
describe '#model_id_with_region with region awareness' do
let(:provider_instance) do
allow(RubyLLM.config).to receive(:bedrock_region).and_return('eu-west-3')
provider = RubyLLM::Providers::Bedrock.new(RubyLLM.config)
provider.extend(described_class)
provider
end

context 'with EU region configured' do
let(:inference_profile_model) do
{
'modelId' => 'anthropic.claude-3-7-sonnet-20250219-v1:0',
'inferenceTypesSupported' => ['INFERENCE_PROFILE']
}
end

let(:us_prefixed_model) do
{
'modelId' => 'us.anthropic.claude-opus-4-1-20250805-v1:0',
'inferenceTypesSupported' => ['INFERENCE_PROFILE']
}
end

it 'adds eu. prefix for inference profile models' do
result = provider_instance.send(:model_id_with_region,
inference_profile_model['modelId'],
inference_profile_model)
expect(result).to eq('eu.anthropic.claude-3-7-sonnet-20250219-v1:0')
end

it 'replaces us. prefix with eu. prefix' do
result = provider_instance.send(:model_id_with_region,
us_prefixed_model['modelId'],
us_prefixed_model)
expect(result).to eq('eu.anthropic.claude-opus-4-1-20250805-v1:0')
end
end

context 'with AP region configured' do
let(:provider_instance) do
allow(RubyLLM.config).to receive(:bedrock_region).and_return('ap-south-1')
provider = RubyLLM::Providers::Bedrock.new(RubyLLM.config)
provider.extend(described_class)
provider
end

it 'replaces existing region prefix with ap. prefix' do
model_data = {
'modelId' => 'us.anthropic.claude-opus-4-1-20250805-v1:0',
'inferenceTypesSupported' => ['INFERENCE_PROFILE']
}

result = provider_instance.send(:model_id_with_region,
model_data['modelId'],
model_data)
expect(result).to eq('ap.anthropic.claude-opus-4-1-20250805-v1:0')
end
end

context 'region prefix edge cases' do
it 'handles empty region gracefully' do
allow(RubyLLM.config).to receive(:bedrock_region).and_return('')
provider = RubyLLM::Providers::Bedrock.new(RubyLLM.config)
provider.extend(described_class)

model_data = {
'modelId' => 'anthropic.claude-opus-4-1-20250805-v1:0',
'inferenceTypesSupported' => ['INFERENCE_PROFILE']
}

result = provider.send(:model_id_with_region,
model_data['modelId'],
model_data)
expect(result).to eq('us.anthropic.claude-opus-4-1-20250805-v1:0')
end

it 'extracts region prefix from various AWS regions' do
regions_and_expected_prefixes = {
'eu-west-3' => 'eu',
'ap-south-1' => 'ap',
'ca-central-1' => 'ca',
'sa-east-1' => 'sa'
}

regions_and_expected_prefixes.each do |region, expected_prefix|
allow(RubyLLM.config).to receive(:bedrock_region).and_return(region)
provider = RubyLLM::Providers::Bedrock.new(RubyLLM.config)
provider.extend(described_class)

model_data = {
'modelId' => 'anthropic.claude-opus-4-1-20250805-v1:0',
'inferenceTypesSupported' => ['INFERENCE_PROFILE']
}

result = provider.send(:model_id_with_region,
model_data['modelId'],
model_data)
expect(result).to eq("#{expected_prefix}.anthropic.claude-opus-4-1-20250805-v1:0")
end
end
end
end
end