2020/5/11

kyzhouhzau · Jun 9, 2020 · b9ecec2 · b9ecec2
1 parent c646ad8
commit b9ecec2
Show file tree

Hide file tree

Showing 4 changed files with 79 additions and 30 deletions.
diff --git a/README.md b/README.md
@@ -40,10 +40,41 @@ Examples (See tests for more details):
 All the above experiments were tested on GTX 1080 GPU with memory 8000MiB.
 
 # Status
+2020/5/--: convert the project name to NLPGNN from fennlp.
+
 2020/5/17: try to convert sentence to graph based on bert attention matrix, but failed.
 This section provides a solution to visualize the BERT attention matrix.
 For more detail, you can check dictionary "BERT-GCN". 
 
+2020/5/11: add TextGCN and TextSAGE for text classification.
+
+2020/5/5: add GIN, GraphSAGE for graph classfication.
+
+2020/4/25: add GAN, GIN model, based on message passing methods.
+
+2020/4/23: add GCN model, based on message passing methods.
+
+2020/4/16：currently focusing on models of GNN in nlp, and trying to integrate some GNN models into fennlp.
+
+2020/4/2: add GPT2 model, could used parameters released by OpenAI (base,medium,large). 
+More detail reference dictionary "TG/EN/interactive.py"
+
+2020/3/26: add Bilstm+Attention example for classification
+
+2020/3/23: add RAdam optimizer.
+
+2020/3/19: add test example "albert_ner_train.py" "albert_ner_test.py"
+
+2020/3/16: add model for training sub word embedding based on bpe methods.
+The trained embedding is used in TextCNN model for improve it's improvement.
+See "tran_bpe_embeding.py" for more details.
+
+2020/3/8: add test example "run_tucker.py" for train TuckER on WN18.
+
+2020/3/3: add test example "tran_text_cnn.py" for train TextCNN model. 
+
+2020/3/2: add test example "train_bert_classification.py" for text classification based on bert.
+
 # Requirement
 * tensorflow-gpu>=2.0
 * typeguard
@@ -292,7 +323,7 @@ Same data split and parameters setting as proposed in this [paper](https://arxiv
 | ------- | -------  |-------     |-------   |
 |GCN      |81.80   |79.50    |  71.20    |  
 |GAN      |83.00   | 79.00   |  72.30    |
-|GAAE     |82.4   |79.60    |  71.7    |  
+|GAAE     |82.40   |79.60    |  71.70   |  
 
 * Graph Classfication
 

diff --git a/tests/NER/NER_EN/albert_ner_test.py b/tests/NER/NER_EN/albert_ner_test.py
@@ -7,7 +7,7 @@
 from nlpgnn.models import albert
 
 # 载入参数
-load_check = LoadCheckpoint(language='en', model="albert", paramaters="base")
+load_check = LoadCheckpoint(language='en', model="albert", parameters="base")
 param, vocab_file, model_path, spm_model_file = load_check.load_albert_param()
 # 定制参数
 param.batch_size = 8

diff --git a/tests/NER/NER_EN/bert_ner_test.py b/tests/NER/NER_EN/bert_ner_test.py
@@ -5,6 +5,7 @@
 from nlpgnn.datas.dataloader import TFWriter, TFLoader
 from nlpgnn.metrics import Metric
 from nlpgnn.models import bert
+from sklearn.metrics import classification_report
 
 # 载入参数
 load_check = LoadCheckpoint(language='en')
@@ -15,6 +16,19 @@
 param.maxlen = 100
 param.label_size = 9
 
+def ner_evaluation(true_label: list, predicts: list, masks: list):
+    all_predict = []
+    all_true = []
+    true_label = [tf.reshape(item, [-1]).numpy() for item in true_label]
+    predicts = [tf.reshape(item, [-1]).numpy() for item in predicts]
+    masks = [tf.reshape(item, [-1]).numpy() for item in masks]
+    for i, j, m in zip(true_label, predicts, masks):
+        index = np.argwhere(m == 1)
+        all_true.extend(i[index].reshape(-1))
+        all_predict.extend(j[index].reshape(-1))
+    report = classification_report(all_true, all_predict, digits=4)# paramaters labels
+    print(report)
+
 class BERT_NER(tf.keras.Model):
     def __init__(self, param, **kwargs):
         super(BERT_NER, self).__init__(**kwargs)
@@ -62,22 +76,17 @@ def predict(self, inputs, is_training=False):
 checkpoint.restore(tf.train.latest_checkpoint('./save'))
 # For test model
 Batch = 0
-f1s = []
-precisions = []
-recalls = []
-accuracys = []
+predicts = []
+true_label = []
+masks = []
 for X, token_type_id, input_mask, Y in ner_load.load_valid():
     predict = model.predict([X, token_type_id, input_mask])  # [batch_size, max_length,label_size]
-    # predict = tf.argmax(output, -1)
-    f1s.append(f1score(Y, predict))
-    precisions.append(precsionscore(Y, predict))
-    recalls.append(recallscore(Y, predict))
-    accuracys.append(accuarcyscore(Y, predict))
-
-print("f1:{}\tprecision:{}\trecall:{}\taccuracy:{}\n".format(np.mean(f1s),
-                                                             np.mean(precisions),
-                                                             np.mean(recalls),
-                                                             np.mean(accuracys)))
+    predict = tf.argmax(predict, -1)
+    predicts.append(predict)
+    true_label.append(Y)
+    masks.append(input_mask)
+print(writer.label2id())
+ner_evaluation(true_label, predicts, masks)
     # print("Sentence", writer.convert_id_to_vocab(tf.reshape(X,[-1]).numpy()))
     #
     # print("Label", writer.convert_id_to_label(tf.reshape(predict,[-1]).numpy()))
diff --git a/tests/NER/NER_ZH/bert_ner_test.py b/tests/NER/NER_ZH/bert_ner_test.py
@@ -5,6 +5,7 @@
 from nlpgnn.datas.dataloader import TFWriter, TFLoader
 from nlpgnn.metrics import Metric
 from nlpgnn.models import bert
+from sklearn.metrics import classification_report
 
 # 载入参数
 load_check = LoadCheckpoint(language='zh')
@@ -15,6 +16,18 @@
 param.maxlen = 100
 param.label_size = 46
 
+def ner_evaluation(true_label: list, predicts: list, masks: list):
+    all_predict = []
+    all_true = []
+    true_label = [tf.reshape(item, [-1]).numpy() for item in true_label]
+    predicts = [tf.reshape(item, [-1]).numpy() for item in predicts]
+    masks = [tf.reshape(item, [-1]).numpy() for item in masks]
+    for i, j, m in zip(true_label, predicts, masks):
+        index = np.argwhere(m == 1)
+        all_true.extend(i[index].reshape(-1))
+        all_predict.extend(j[index].reshape(-1))
+    report = classification_report(all_true, all_predict, digits=4)# paramaters labels
+    print(report)
 
 # 构建模型
 class BERT_NER(tf.keras.Model):
@@ -65,22 +78,18 @@ def predict(self, inputs, is_training=False):
 # For test model
 # print(dir(checkpoint))
 Batch = 0
-f1s = []
-precisions = []
-recalls = []
-accuracys = []
+predicts = []
+true_label = []
+masks = []
 for X, token_type_id, input_mask, Y in ner_load.load_valid():
     predict = model.predict([X, token_type_id, input_mask])  # [batch_size, max_length,label_size]
-    # predict = tf.argmax(output, -1)
-    f1s.append(f1score(Y, predict))
-    precisions.append(precsionscore(Y, predict))
-    recalls.append(recallscore(Y, predict))
-    accuracys.append(accuarcyscore(Y, predict))
-
-print("f1:{}\tprecision:{}\trecall:{}\taccuracy:{}\n".format(np.mean(f1s),
-                                                             np.mean(precisions),
-                                                             np.mean(recalls),
-                                                             np.mean(accuracys)))
+    predict = tf.argmax(predict, -1)
+    predicts.append(predict)
+    true_label.append(Y)
+    masks.append(input_mask)
+print(writer.label2id())
+ner_evaluation(true_label, predicts, masks)
+
 # print("Sentence", writer.convert_id_to_vocab(tf.reshape(X,[-1]).numpy()))
 #
 # print("Label", writer.convert_id_to_label(tf.reshape(predict,[-1]).numpy()))