Skip to content

Commit

Permalink
2020/5/11
Browse files Browse the repository at this point in the history
  • Loading branch information
kyzhouhzau committed Jun 9, 2020
1 parent c646ad8 commit b9ecec2
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 30 deletions.
33 changes: 32 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,41 @@ Examples (See tests for more details):
All the above experiments were tested on GTX 1080 GPU with memory 8000MiB.

# Status
2020/5/--: convert the project name to NLPGNN from fennlp.

2020/5/17: try to convert sentence to graph based on bert attention matrix, but failed.
This section provides a solution to visualize the BERT attention matrix.
For more detail, you can check dictionary "BERT-GCN".

2020/5/11: add TextGCN and TextSAGE for text classification.

2020/5/5: add GIN, GraphSAGE for graph classfication.

2020/4/25: add GAN, GIN model, based on message passing methods.

2020/4/23: add GCN model, based on message passing methods.

2020/4/16:currently focusing on models of GNN in nlp, and trying to integrate some GNN models into fennlp.

2020/4/2: add GPT2 model, could used parameters released by OpenAI (base,medium,large).
More detail reference dictionary "TG/EN/interactive.py"

2020/3/26: add Bilstm+Attention example for classification

2020/3/23: add RAdam optimizer.

2020/3/19: add test example "albert_ner_train.py" "albert_ner_test.py"

2020/3/16: add model for training sub word embedding based on bpe methods.
The trained embedding is used in TextCNN model for improve it's improvement.
See "tran_bpe_embeding.py" for more details.

2020/3/8: add test example "run_tucker.py" for train TuckER on WN18.

2020/3/3: add test example "tran_text_cnn.py" for train TextCNN model.

2020/3/2: add test example "train_bert_classification.py" for text classification based on bert.

# Requirement
* tensorflow-gpu>=2.0
* typeguard
Expand Down Expand Up @@ -292,7 +323,7 @@ Same data split and parameters setting as proposed in this [paper](https://arxiv
| ------- | ------- |------- |------- |
|GCN |81.80 |79.50 | 71.20 |
|GAN |83.00 | 79.00 | 72.30 |
|GAAE |82.4 |79.60 | 71.7 |
|GAAE |82.40 |79.60 | 71.70 |

* Graph Classfication

Expand Down
2 changes: 1 addition & 1 deletion tests/NER/NER_EN/albert_ner_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from nlpgnn.models import albert

# 载入参数
load_check = LoadCheckpoint(language='en', model="albert", paramaters="base")
load_check = LoadCheckpoint(language='en', model="albert", parameters="base")
param, vocab_file, model_path, spm_model_file = load_check.load_albert_param()
# 定制参数
param.batch_size = 8
Expand Down
37 changes: 23 additions & 14 deletions tests/NER/NER_EN/bert_ner_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from nlpgnn.datas.dataloader import TFWriter, TFLoader
from nlpgnn.metrics import Metric
from nlpgnn.models import bert
from sklearn.metrics import classification_report

# 载入参数
load_check = LoadCheckpoint(language='en')
Expand All @@ -15,6 +16,19 @@
param.maxlen = 100
param.label_size = 9

def ner_evaluation(true_label: list, predicts: list, masks: list):
all_predict = []
all_true = []
true_label = [tf.reshape(item, [-1]).numpy() for item in true_label]
predicts = [tf.reshape(item, [-1]).numpy() for item in predicts]
masks = [tf.reshape(item, [-1]).numpy() for item in masks]
for i, j, m in zip(true_label, predicts, masks):
index = np.argwhere(m == 1)
all_true.extend(i[index].reshape(-1))
all_predict.extend(j[index].reshape(-1))
report = classification_report(all_true, all_predict, digits=4)# paramaters labels
print(report)

class BERT_NER(tf.keras.Model):
def __init__(self, param, **kwargs):
super(BERT_NER, self).__init__(**kwargs)
Expand Down Expand Up @@ -62,22 +76,17 @@ def predict(self, inputs, is_training=False):
checkpoint.restore(tf.train.latest_checkpoint('./save'))
# For test model
Batch = 0
f1s = []
precisions = []
recalls = []
accuracys = []
predicts = []
true_label = []
masks = []
for X, token_type_id, input_mask, Y in ner_load.load_valid():
predict = model.predict([X, token_type_id, input_mask]) # [batch_size, max_length,label_size]
# predict = tf.argmax(output, -1)
f1s.append(f1score(Y, predict))
precisions.append(precsionscore(Y, predict))
recalls.append(recallscore(Y, predict))
accuracys.append(accuarcyscore(Y, predict))

print("f1:{}\tprecision:{}\trecall:{}\taccuracy:{}\n".format(np.mean(f1s),
np.mean(precisions),
np.mean(recalls),
np.mean(accuracys)))
predict = tf.argmax(predict, -1)
predicts.append(predict)
true_label.append(Y)
masks.append(input_mask)
print(writer.label2id())
ner_evaluation(true_label, predicts, masks)
# print("Sentence", writer.convert_id_to_vocab(tf.reshape(X,[-1]).numpy()))
#
# print("Label", writer.convert_id_to_label(tf.reshape(predict,[-1]).numpy()))
37 changes: 23 additions & 14 deletions tests/NER/NER_ZH/bert_ner_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from nlpgnn.datas.dataloader import TFWriter, TFLoader
from nlpgnn.metrics import Metric
from nlpgnn.models import bert
from sklearn.metrics import classification_report

# 载入参数
load_check = LoadCheckpoint(language='zh')
Expand All @@ -15,6 +16,18 @@
param.maxlen = 100
param.label_size = 46

def ner_evaluation(true_label: list, predicts: list, masks: list):
all_predict = []
all_true = []
true_label = [tf.reshape(item, [-1]).numpy() for item in true_label]
predicts = [tf.reshape(item, [-1]).numpy() for item in predicts]
masks = [tf.reshape(item, [-1]).numpy() for item in masks]
for i, j, m in zip(true_label, predicts, masks):
index = np.argwhere(m == 1)
all_true.extend(i[index].reshape(-1))
all_predict.extend(j[index].reshape(-1))
report = classification_report(all_true, all_predict, digits=4)# paramaters labels
print(report)

# 构建模型
class BERT_NER(tf.keras.Model):
Expand Down Expand Up @@ -65,22 +78,18 @@ def predict(self, inputs, is_training=False):
# For test model
# print(dir(checkpoint))
Batch = 0
f1s = []
precisions = []
recalls = []
accuracys = []
predicts = []
true_label = []
masks = []
for X, token_type_id, input_mask, Y in ner_load.load_valid():
predict = model.predict([X, token_type_id, input_mask]) # [batch_size, max_length,label_size]
# predict = tf.argmax(output, -1)
f1s.append(f1score(Y, predict))
precisions.append(precsionscore(Y, predict))
recalls.append(recallscore(Y, predict))
accuracys.append(accuarcyscore(Y, predict))

print("f1:{}\tprecision:{}\trecall:{}\taccuracy:{}\n".format(np.mean(f1s),
np.mean(precisions),
np.mean(recalls),
np.mean(accuracys)))
predict = tf.argmax(predict, -1)
predicts.append(predict)
true_label.append(Y)
masks.append(input_mask)
print(writer.label2id())
ner_evaluation(true_label, predicts, masks)

# print("Sentence", writer.convert_id_to_vocab(tf.reshape(X,[-1]).numpy()))
#
# print("Label", writer.convert_id_to_label(tf.reshape(predict,[-1]).numpy()))

0 comments on commit b9ecec2

Please sign in to comment.