Skip to content

Commit

Permalink
Fix typos (PaddlePaddle#4959)
Browse files Browse the repository at this point in the history
  • Loading branch information
co63oc authored Feb 23, 2023
1 parent f354fe6 commit 4440538
Show file tree
Hide file tree
Showing 23 changed files with 50 additions and 50 deletions.
2 changes: 1 addition & 1 deletion docs/get_started/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ Anaconda是一个开源的Python发行版本,其包含了conda、Python等180
按如上方式配置后,即可在环境中使用PaddleNLP了,命令行输入python回车后,import paddlenlp试试吧,之后再次使用都可以通过打开'所有程序->Anaconda3/2(64-bit)->Anaconda Prompt',再执行conda activate my_paddlenlp进入环境后,即可再次使用PaddleNLP。

2、Linux/Mac安装安装Anaconda
2、Linux/Mac安装Anaconda
>>>>>>>>>

第一步 下载
Expand Down
2 changes: 1 addition & 1 deletion docs/locale/en/LC_MESSAGES/get_started/installation.po
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ msgid ""
msgstr ""

#: ../get_started/installation.rst:53
msgid "2、Linux/Mac安装安装Anaconda"
msgid "2、Linux/Mac安装Anaconda"
msgstr ""

#: ../get_started/installation.rst:57
Expand Down
2 changes: 1 addition & 1 deletion docs/model_zoo/embeddings.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

## 介绍

PaddleNLP提供多个开源的预训练词向量模型,用户仅需在使用`paddlenlp.embeddings.TokenEmbedding`时,指定预训练模型的名称,即可加载相对应的预训练模型。以下将介绍`TokenEmbeddign`详细用法,并列出PaddleNLP所支持的预训练Embedding模型。
PaddleNLP提供多个开源的预训练词向量模型,用户仅需在使用`paddlenlp.embeddings.TokenEmbedding`时,指定预训练模型的名称,即可加载相对应的预训练模型。以下将介绍`TokenEmbedding`详细用法,并列出PaddleNLP所支持的预训练Embedding模型。

## 用法

Expand Down
2 changes: 1 addition & 1 deletion examples/text_to_sql/IGSQL/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ python run.py --raw_train_filename="data/sparc_data_removefrom/train.pkl" \
* embedding_filename: GLOVE 词向量文件路径。
* data_directory: 预处理得到的文件夹路径。
* logdir: 输出日志文件夹路径。
* train,evaluate: 是否执行trian,evaluate。
* train,evaluate: 是否执行train,evaluate。


### 训练阶段的输出日志
Expand Down
2 changes: 1 addition & 1 deletion examples/text_to_sql/RAT-SQL/evaluation/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# 评估
输入文件格式:
1. 文件以.sql结尾
2. 文件每行的格式:"qid\tsql_query\tdb_id",其中predcit文件db_id是可选字段,gold文件db_id是必选字段
2. 文件每行的格式:"qid\tsql_query\tdb_id",其中predict文件db_id是可选字段,gold文件db_id是必选字段
3. 评估指标:exact matching score

# 使用
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def tokenize(self, question, db, column_match_cells=None, candi_nums=None, candi
if match_cells is not None and len(match_cells) > 0:
if column.dtype in ("text", "time"):
if not self.config.predict_value:
match_cells = match_cells[:1] # the first cell used to complement senmantics
match_cells = match_cells[:1] # the first cell used to complement semantics
for mcell in match_cells:
value_list.append(mcell)
toks = [self.special_token_dict["value"]] + self.tokenizer.tokenize(mcell)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def add_item(self, section, sql_json, value_list):
self.format_sql_value(sql_json, value_dict)

parsed = self.grammar.parse(sql_json, section)
self.ast_wrapper.verify_ast(parsed) # will raise AssertionError, if varify failed
self.ast_wrapper.verify_ast(parsed) # will raise AssertionError, if verify failed

root = parsed
if section == "train":
Expand Down
6 changes: 3 additions & 3 deletions examples/text_to_sql/RAT-SQL/text2sql/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,12 @@ def init_ernie_model(model_class, model_dir):
return ernie, config["hidden_size"]


def save(model, optimzer, save_path):
def save(model, optimizer, save_path):
try:
paddle.save(model.state_dict(), save_path + ".pdparams")
paddle.save(optimzer.state_dict(), save_path + ".pdopt")
paddle.save(optimizer.state_dict(), save_path + ".pdopt")
except Exception as e:
logging.error("save model and optimzer failed. save path: %s", save_path)
logging.error("save model and optimizer failed. save path: %s", save_path)
logging.error(traceback.format_exc())


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class Hypothesis4Filtering(Hypothesis):

def beam_search_with_heuristics(model, inputs, beam_size, max_steps, from_cond=True):
"""
Find the valid FROM clasue with beam search
Find the valid FROM clause with beam search
"""
orig_inputs = inputs["orig_inputs"][0]
# inference_state, next_choices = model.inference(inputs, orig_inputs.db)
Expand Down Expand Up @@ -93,7 +93,7 @@ def beam_search_with_heuristics(model, inputs, beam_size, max_steps, from_cond=T
prefixes2fill_from.sort(key=operator.attrgetter("score"), reverse=True)
# assert len(prefixes) == beam_size

# emuerating
# enumerating
beam_from = prefixes2fill_from
max_size = 6
unfiltered_finished = []
Expand Down
4 changes: 2 additions & 2 deletions examples/text_to_sql/RAT-SQL/text2sql/optim.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

import paddle

param_name_to_exclue_from_weight_decay = re.compile(r".*layer_norm_scale|.*layer_norm_bias|.*b_0")
param_name_to_exclude_from_weight_decay = re.compile(r".*layer_norm_scale|.*layer_norm_bias|.*b_0")


def get_warmup_and_linear_decay(max_steps, warmup_steps):
Expand All @@ -43,7 +43,7 @@ def init_optimizer(model, config, train_steps, scale_params_lr=None):
lr_scheduler,
parameters=model.parameters(),
weight_decay=config.weight_decay,
apply_decay_param_fun=lambda n: not param_name_to_exclue_from_weight_decay.match(n),
apply_decay_param_fun=lambda n: not param_name_to_exclude_from_weight_decay.match(n),
grad_clip=paddle.nn.ClipGradByGlobalNorm(config.grad_clip),
)
return optimizer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -423,11 +423,11 @@ def parse_from(toks, start_idx, tables_with_alias, schema):
assert toks[idx] == ")"
idx += 1
if idx < len_ and toks[idx] == "a":
assert last_table is not None, "last_table should be a table name strin, not None"
assert last_table is not None, "last_table should be a table name string, not None"
tables_with_alias["a"] = last_table
idx += 2
elif idx < len_ and toks[idx] == "b":
assert last_table is not None, "last_table should be a table name strin, not None"
assert last_table is not None, "last_table should be a table name string, not None"
tables_with_alias["b"] = last_table
idx += 1
if idx < len_ and (toks[idx] in CLAUSE_KEYWORDS or toks[idx] in (")", ";")):
Expand Down Expand Up @@ -675,7 +675,7 @@ def __eval_from_sql(pred_tables, gold_tables):
# return 1

def eval_exact_match(self, pred, gold):
"""wrapper of evaluate examct match, to process
"""wrapper of evaluate exact match, to process
`SQL1 intersect/union SQL2` vs `SQL2 intersect/union SQL1`
"""
score = self._eval_exact_match(pred, gold)
Expand Down
4 changes: 2 additions & 2 deletions examples/text_to_sql/RAT-SQL/text2sql/utils/nn_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def lstm_init(num_layers, hidden_size, *batch_sizes):


def batch_gather_2d(var, indices):
"""Gather slices from var in each batch, according to corrensponding
"""Gather slices from var in each batch, according to corresponding
index in indices. Currently, it only support 2d Tensor.
Args:
Expand Down Expand Up @@ -156,7 +156,7 @@ def pad_sequences_for_3d(seqs, max_col, max_num, dtype=np.int64):


def pad_index_sequences(seqs, max_col, max_row, dtype=np.int64):
"""padding squences for column token indexs"""
"""padding sequences for column token indexes"""
padded = []
for query in seqs:
new_cols = []
Expand Down
2 changes: 1 addition & 1 deletion examples/text_to_sql/RAT-SQL/text2sql/utils/text_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def search_values(cls, question, table):
all_candidate = []
for col_id in range(len(table.header)):
header = table.header[col_id]
# 提取col出现在quesiton中的cell
# 提取col出现在question中的cell
# TODO 这里存在一个问题,一个text类型cell必须完全在question中出现才会被当做候选cell
value_in_column = cls.extract_values_from_column(question, table, col_id, header.type)
if header.type == "text":
Expand Down
6 changes: 3 additions & 3 deletions examples/text_to_sql/RAT-SQL/text2sql/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,10 @@ def count_file_lines(filename):
return cnt


def print_tensors(tag="*", **kwrags):
"""print tensors for debuging"""
def print_tensors(tag="*", **kwargs):
"""print tensors for debugging"""
print(tag * 50)
for key, value in kwrags.items():
for key, value in kwargs.items():
print(key, ":", value)


Expand Down
2 changes: 1 addition & 1 deletion examples/word_embedding/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def forward(self, text, seq_len=None):
# Loads dataset.
train_ds, dev_ds = load_dataset("chnsenticorp", splits=["train", "dev"])

# Constructs the newtork.
# Constructs the network.
model = BoWModel(
vocab_size=len(vocab),
num_classes=len(train_ds.label_list),
Expand Down
14 changes: 7 additions & 7 deletions paddlenlp/ops/einsum.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def einsum(equation, *operands):
Uses uncased letters to specify the dimension of the operands and result. The input
equation is on the left hand before `->` while the output equation is on the right side.
Einsum can infer the result shape so that the `->` and the result label letters can be omitted.
Operands in the input equation are splited by commas (','), e.g. 'abc,cde' describes two 3D
Operands in the input equation are splitted by commas (','), e.g. 'abc,cde' describes two 3D
operands. The dimensions labeled with same letter should be same or be 1. Ellipsis ('...') can
be used to specify the broadcast dimensions.
Expand Down Expand Up @@ -129,14 +129,14 @@ def _mul_sum(left, right, sum_dims):
is_right_summed_dim = right.shape[i] > 1
if i in sum_dims_set:
if is_left_summed_dim and is_right_summed_dim:
assert left.shape[i] == right.shape[i], "Non-brocast dim should be equal."
assert left.shape[i] == right.shape[i], "Non-broadcast dim should be equal."
summed_size *= left.shape[i]
elif is_left_summed_dim:
left = left.sum(axis=i, keepdim=True)
elif is_right_summed_dim:
right = right.sum(axis=i, keepdim=True)
elif is_left_summed_dim and is_right_summed_dim:
assert left.shape[i] == right.shape[i], "Non-brocast dim should be equal."
assert left.shape[i] == right.shape[i], "Non-broadcast dim should be equal."
batch_dims.append(i)
batch_size *= left.shape[i]
elif is_left_summed_dim:
Expand Down Expand Up @@ -204,7 +204,7 @@ def _mul_sum(left, right, sum_dims):
for ch in term:
if ch == ".":
ell_char_count += 1
assert ell_char_count <= 3, "The '.' should only exist in one ellispis '...' in term {}".format(term)
assert ell_char_count <= 3, "The '.' should only exist in one ellipsis '...' in term {}".format(term)
if ell_char_count == 3:
if num_ell_idxes == -1:
num_ell_idxes = curr_num_ell_idxes
Expand All @@ -213,7 +213,7 @@ def _mul_sum(left, right, sum_dims):
else:
assert (
curr_num_ell_idxes == num_ell_idxes
), "Ellispis in all terms should represent same dimensions ({}).".format(num_ell_idxes)
), "Ellipsis in all terms should represent same dimensions ({}).".format(num_ell_idxes)

for j in range(num_ell_idxes):
curr_operand_idxes.append(j + first_ell_idx)
Expand Down Expand Up @@ -247,11 +247,11 @@ def _mul_sum(left, right, sum_dims):
for ch in output_eqn:
if ch == ".":
ell_char_count += 1
assert ell_char_count <= 3, "The '.' should only exist in one ellispis '...' in term {}".format(
assert ell_char_count <= 3, "The '.' should only exist in one ellipsis '...' in term {}".format(
output_eqn
)
if ell_char_count == 3:
assert num_ell_idxes > -1, "Input equation '{}' don't have ellispis.".format(input_eqn)
assert num_ell_idxes > -1, "Input equation '{}' don't have ellipsis.".format(input_eqn)
for j in range(num_ell_idxes):
idxes_to_output_dims[first_ell_idx + j] = num_output_dims
num_output_dims += 1
Expand Down
2 changes: 1 addition & 1 deletion paddlenlp/prompt/prompt_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class PromptDataCollatorWithPadding:
pad the inputs to the longest sequence in the batch.
Args:
tokenizer (`paddlennlp.transformers.PretrainedTokenizer`):
tokenizer (`paddlenlp.transformers.PretrainedTokenizer`):
The tokenizer used for encoding the data from PromptTokenizer.
"""

Expand Down
6 changes: 3 additions & 3 deletions paddlenlp/prompt/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,7 @@ def parse_soft_prompt(self):
`Tuple[Dict[int, int], List[List[int]], int]`:
- Mapping from continuous ids to word ids for initialization.
- Continuous ids for each part. Id 0 denotes none-continuous part.
- Number of unique coutinuous tokens.
- Number of unique continuous tokens.
"""
prompt = self._prompt.copy()
num_soft_token = 1
Expand Down Expand Up @@ -560,15 +560,15 @@ def parse_soft_prompt(self):
else:
soft_id_reindex[part["soft_id"]] = soft_id_list

# Deal with continous prompt defined by `soft_id`.
# Deal with continuous prompt defined by `soft_id`.
elif "soft_id" in part and part["soft_id"] in soft_id_reindex:
soft_id_list = soft_id_reindex[part["soft_id"]]
if "length" in part:
logger.warning("Ignore `length` because it is incompatible with existing `soft_id`.")
soft_token_ids.append(soft_id_list)
part_prompt = {"soft": [self.tokenizer.unk_token] * len(soft_id_list)}

# Deal with continous prompt with random initialization.
# Deal with continuous prompt with random initialization.
else:
if "length" not in part:
part["length"] = 1
Expand Down
8 changes: 4 additions & 4 deletions paddlenlp/seq2vec/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def forward(self, inputs, mask=None):
Shape as `(batch_size, num_tokens, emb_dim)` and dtype as `float32` or `float64`.
Tensor containing the features of the input sequence.
mask (Tensor, optional):
Shape shoule be same as `inputs` and dtype as `int32`, `int64`, `float32` or `float64`.
Shape should be same as `inputs` and dtype as `int32`, `int64`, `float32` or `float64`.
Its each elements identify whether the corresponding input token is padding or not.
If True, not padding token. If False, padding token.
Defaults to `None`.
Expand Down Expand Up @@ -319,7 +319,7 @@ class GRUEncoder(nn.Layer):
Defaults to 1.
direction (str, optional):
The direction of the network. It can be "forward" and "bidirect"
(it means bidirection network). If "bidirect", it is a birectional GRU,
(it means bidirection network). If "bidirect", it is a bidirectional GRU,
and returns the concat output from both directions.
Defaults to "forward".
dropout (float, optional):
Expand Down Expand Up @@ -504,7 +504,7 @@ class LSTMEncoder(nn.Layer):
Defaults to 1.
direction (str, optional):
The direction of the network. It can be "forward" or "bidirect" (it means bidirection network).
If "bidirect", it is a birectional LSTM, and returns the concat output from both directions.
If "bidirect", it is a bidirectional LSTM, and returns the concat output from both directions.
Defaults to "forward".
dropout (float, optional):
If non-zero, introduces a Dropout layer on the outputs of each LSTM layer
Expand Down Expand Up @@ -688,7 +688,7 @@ class RNNEncoder(nn.Layer):
Defaults to 1.
direction (str, optional):
The direction of the network. It can be "forward" and "bidirect"
(it means bidirection network). If "biderect", it is a birectional RNN,
(it means bidirection network). If "bidirect", it is a bidirectional RNN,
and returns the concat output from both directions. Defaults to "forward"
dropout (float, optional):
If non-zero, introduces a Dropout layer on the outputs of each RNN layer
Expand Down
6 changes: 3 additions & 3 deletions paddlenlp/taskflow/dependency_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def __init__(
else:
raise ValueError(
"The encoding model should be one of \
ddparser, ddparser-ernie-1.0 and ddoarser-ernie-gram-zh"
ddparser, ddparser-ernie-1.0 and ddparser-ernie-gram-zh"
)
self._check_task_files()
self._construct_vocabs()
Expand Down Expand Up @@ -528,9 +528,9 @@ def eisner(scores, mask):
s_i = np.full_like(scores, float("-inf"))
# Score for complete span
s_c = np.full_like(scores, float("-inf"))
# Incompelte span position for backtrack
# Incomplete span position for backtrack
p_i = np.zeros((seq_len, seq_len, batch_size), dtype=np.int64)
# Compelte span position for backtrack
# Complete span position for backtrack
p_c = np.zeros((seq_len, seq_len, batch_size), dtype=np.int64)
# Set 0 to s_c.diagonal
s_c = fill_diagonal(s_c, 0)
Expand Down
2 changes: 1 addition & 1 deletion paddlenlp/taskflow/models/dependency_parsing_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def index_sample(x, index):
arr_index = paddle.arange(start=0, end=len(index), dtype=index.dtype)
arr_index = paddle.unsqueeze(arr_index, axis=[1, 2])
arr_index = paddle.expand(arr_index, index.shape)
# Genrate new index
# Generate new index
new_index = paddle.concat((arr_index, index), -1)
new_index = paddle.reshape(new_index, (-1, 2))
# Get output
Expand Down
8 changes: 4 additions & 4 deletions paddlenlp/taskflow/models/sentiment_analysis_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class BoWModel(nn.Layer):
padding_idx(int, optional): The padding value in the embedding, the padding_idx of embedding value will
not be updated, the default value is 0.
hidden_size(int, optional): The output size of linear that after the bow, default value is 128.
fc_hidden_size(int, optional): The output size of linear that after the fisrt linear, default value is 96.
fc_hidden_size(int, optional): The output size of linear that after the first linear, default value is 96.
"""

def __init__(self, vocab_size, num_classes, emb_dim=128, padding_idx=0, hidden_size=128, fc_hidden_size=96):
Expand Down Expand Up @@ -71,15 +71,15 @@ class LSTMModel(nn.Layer):
which is passed through some feed-forward layers to output a logits (`output_layer`).
Args:
vocab_size(int): The vocab size that used to create the embedding.
num_class(int): The num clas of the classifier.
num_class(int): The num class of the classifier.
emb_dim(int. optional): The size of the embedding, default value is 128.
padding_idx(int, optional): The padding value in the embedding, the padding_idx of embedding value will
not be updated, the default value is 0.
lstm_hidden_size(int, optional): The output size of the lstm, defalut value 198.
lstm_hidden_size(int, optional): The output size of the lstm, default value 198.
direction(string, optional): The direction of lstm, default value is `forward`.
lstm_layers(string, optional): The num of lstm layer.
dropout(float, optional): The dropout rate of lstm.
pooling_type(float, optional): The pooling type of lstm. Defalut value is None,
pooling_type(float, optional): The pooling type of lstm. Default value is None,
if `pooling_type` is None, then the LSTMEncoder will return the hidden state of the last time step at last layer as a single vector.
"""

Expand Down
Loading

0 comments on commit 4440538

Please sign in to comment.