Skip to content

Commit

Permalink
[fix] optimum token selector do sample defaults (#2448)
Browse files Browse the repository at this point in the history
  • Loading branch information
tosterberg authored Oct 15, 2024
1 parent 57b4f2f commit 94644d1
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,9 @@ def make_generations(
slot_request.slot.clear()
return generation, finish_reason

def _generate_token(self, inputs: GenerationInputs,
prefill: Optional[bool]) -> List[Generation]:
def _generate_token(self,
inputs: GenerationInputs,
prefill: Optional[bool] = None) -> List[Generation]:
"""Prepare inputs for batching strategy
Args:
inputs (GenerationInputs): inputs tokenized tensor values
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def assign(self, request: Request, generation_config: GenerationConfig,
if self._generation_config.do_sample:
self._generation_config.temperature = param.get("temperature", 0.9)
self._generation_config.top_k = param.get("top_k", 0)
self._generation_config.top_p = param.get("top_p", 1.0)
self._generation_config.top_p = param.get("top_p", 0.9)
self._generation_config.typical_p = param.get("typical_p", 1.0)
self.seed = int(param.get("seed", 0))

Expand Down
24 changes: 0 additions & 24 deletions tests/integration/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -727,30 +727,6 @@ def test_gpt2_quantize(self):
r.launch(container='pytorch-inf2-1')
client.run("transformers_neuronx gpt2-quantize".split())

def test_opt_1_3b(self):
with Runner('pytorch-inf2', 'opt-1.3b') as r:
prepare.build_transformers_neuronx_handler_model("opt-1.3b")
r.launch(container='pytorch-inf2-6')
client.run("transformers_neuronx opt-1.3b".split())

def test_gpt_j_6b(self):
with Runner('pytorch-inf2', 'gpt-j-6b') as r:
prepare.build_transformers_neuronx_handler_model("gpt-j-6b")
r.launch(container='pytorch-inf2-6')
client.run("transformers_neuronx gpt-j-6b".split())

def test_pythia(self):
with Runner('pytorch-inf2', 'pythia-2.8b') as r:
prepare.build_transformers_neuronx_handler_model("pythia-2.8b")
r.launch(container='pytorch-inf2-2')
client.run("transformers_neuronx pythia-2.8b".split())

def test_bloom(self):
with Runner('pytorch-inf2', 'bloom-7b1') as r:
prepare.build_transformers_neuronx_handler_model("bloom-7b1")
r.launch(container='pytorch-inf2-2')
client.run("transformers_neuronx bloom-7b1".split())

@pytest.mark.parametrize("model",
["tiny-llama-rb-aot", "tiny-llama-rb-aot-quant"])
def test_partition(self, model):
Expand Down

0 comments on commit 94644d1

Please sign in to comment.