Skip to content

Commit

Permalink
Merge pull request #1609 from nvjax-svc-0/patch/fix-default-vocab
Browse files Browse the repository at this point in the history
[NVIDIA] Fix default output features
  • Loading branch information
adarob authored Jan 17, 2025
2 parents 5f03619 + 9cc0d6f commit a186770
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,16 @@

DEFAULT_OUTPUT_FEATURES = {
"inputs": seqio.Feature(
vocabulary=t5.data.get_default_vocabulary(), add_eos=True,
vocabulary=seqio.SentencePieceVocabulary(
sentencepiece_model_file="gs://t5-data/vocabs/cc_all.32000.100extra/sentencepiece.model",
),
add_eos=True,
required=False),
"targets": seqio.Feature(
vocabulary=t5.data.get_default_vocabulary(), add_eos=True)
vocabulary=seqio.SentencePieceVocabulary(
sentencepiece_model_file="gs://t5-data/vocabs/cc_all.32000.100extra/sentencepiece.model",
),
add_eos=True)
}

# ================================ Wikipedia ===================================
Expand Down
17 changes: 13 additions & 4 deletions t5x/contrib/gpu/scripts_gpu/seqio_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,20 @@

DEFAULT_OUTPUT_FEATURES = {
"inputs":
seqio.Feature(vocabulary=t5.data.get_default_vocabulary(),
add_eos=True,
required=False),
seqio.Feature(
vocabulary=seqio.SentencePieceVocabulary(
sentencepiece_model_file="gs://t5-data/vocabs/cc_all.32000.100extra/sentencepiece.model",
),
add_eos=True,
required=False
),
"targets":
seqio.Feature(vocabulary=t5.data.get_default_vocabulary(), add_eos=True)
seqio.Feature(
vocabulary=seqio.SentencePieceVocabulary(
sentencepiece_model_file="gs://t5-data/vocabs/cc_all.32000.100extra/sentencepiece.model",
),
add_eos=True
)
}

# ================================== The Pile ====================================
Expand Down

0 comments on commit a186770

Please sign in to comment.