Michael-Dyq
diff --git a/Diff for: ‎Analyze_parsing_results.py
+14 b/Diff for: ‎Analyze_parsing_results.py
+14
diff --git a/Diff for: ‎Better_test.py
+401 b/Diff for: ‎Better_test.py
+401
diff --git a/Diff for: ‎Evaluation.py
+383 b/Diff for: ‎Evaluation.py
+383
diff --git a/Diff for: ‎Evaluation_by_sentence.py
+309 b/Diff for: ‎Evaluation_by_sentence.py
+309
diff --git a/Diff for: ‎Extraction_quizlet3.py
+65 b/Diff for: ‎Extraction_quizlet3.py
+65
diff --git a/Diff for: ‎LICENSE
+21 b/Diff for: ‎LICENSE
+21
diff --git a/Diff for: ‎README.md
+19-2 b/Diff for: ‎README.md
+19-2
diff --git a/Diff for: ‎__pycache__/util.cpython-36.pyc
22.8 KB b/Diff for: ‎__pycache__/util.cpython-36.pyc
22.8 KB
@@ -0,0 +1,14 @@
+import ujson as json
+
+
+with open('data/quizlet3/data_after_event_extraction.json', 'r') as f:
+    tmp_results = json.load(f)
+
+
+event_counter = 0
+for tmp_document in tmp_results:
+    for tmp_prediction_by_sentence in tmp_document['event_extraction_results']:
+        event_counter += len(tmp_prediction_by_sentence)
+
+print('Number of extracted events:', event_counter)
+print('Number of sentences:', len(tmp_results))
@@ -0,0 +1,65 @@
+from util import *
+from document_reader import *
+import os
+
+folder_name = '/shared/kairos/Data/LDC2020E30_KAIROS_Quizlet_3_Source_Data_and_Graph_G/data/source/ltf/ltf/'
+documents = list()
+for tmp_file_name in os.listdir(folder_name):
+    if 'xml' in tmp_file_name:
+        extracted_data = ltf_reader(folder_name, tmp_file_name)
+        documents.append(extracted_data)
+
+    # sentences = list()
+    # for tmp_s in extracted_data['sentences']:
+    #     sentences.append(tmp_s['content'])
+
+    # new_file_name = tmp_file_name.replace('.xml', '.txt')
+    # with open('data/quizlet3/' + new_file_name, 'w', encoding='utf-8') as f:
+    #     for s in sentences:
+    #         f.write(s)
+    #         f.write('\n')
+
+# print('done')
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--gpu", default='1', type=str, required=False,
+                    help="choose which gpu to use")
+parser.add_argument("--representation_source", default='nyt', type=str, required=False,
+                    help="choose which gpu to use")
+parser.add_argument("--model", default='bert-large', type=str, required=False,
+                    help="choose which gpu to use")
+parser.add_argument("--pooling_method", default='final', type=str, required=False,
+                    help="choose which gpu to use")
+parser.add_argument("--weight", default=100, type=float, required=False,
+                    help="weight assigned to triggers")
+parser.add_argument("--argument_matching", default='exact', type=str, required=False,
+                    help="weight assigned to triggers")
+parser.add_argument("--eval_model", default='joint', type=str, required=False,
+                    help="weight assigned to triggers")
+
+args = parser.parse_args()
+os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print('current device:', device)
+
+
+test_extractor = CogcompKairosEventExtractor(device)
+
+results = list()
+for tmp_document in documents:
+    print('We are working on document:', tmp_document["doc_id"])
+    extracted_results = list()
+    for tmp_s in tqdm(tmp_document['sentences']):
+        extracted_results.append(test_extractor.extract(tmp_s['content']))
+    tmp_document['event_extraction_results'] = extracted_results
+    results.append(tmp_document)
+
+with open('data/quizlet3/data_after_event_extraction.json', 'w') as f:
+    json.dump(results, f)
+
+
+
+
+
+print('end')
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 Hongming ZHANG
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -1,2 +1,19 @@
-# KAIROS-Event-Extraction
-KAIROS-Event-Extraction
+# Kairos-cogcomp
+
+### Environment
+1. Setup the environment with the environment.yml file (`conda myenv create -f environment.yml`)
+2. Downgrade the xlrd package to 1.2.0 with `pip uninstall xlrd` and then `pip install xlrd==1.2.0`
+
+### Big files
+
+Download files from server with `wget https://www.seas.upenn.edu/~hzhangal/etype_to_distinct_embeddings.json` and `wget https://www.seas.upenn.edu/~hzhangal/rtype_to_distinct_embeddings.json` and put them under the `data/` folder.
+
+
+
+### Run
+
+Run the service with `python backend.py`
+
+### Examples
+1. Input example: curl -d '{"sentence": "The president of the USA holds a lot of power.", "tokens": ["The", "president", "of", "the", "USA", "holds", "a", "lot", "of", "power", "."], "identified_trigger_positions": [[5, 6], [1, 2], [7, 8], [9, 10]], "detected_mentions": {"(0, 1)": "NE", "(3, 5)": "NE"}, "trigger_to_arguments": {"(5, 6)": [[0, 1], [3, 5]], "(1, 2)": [[3, 5]], "(7, 8)": [], "(9, 10)": []}}' -H "Content-Type: application/json" -X POST http://localhost:20203/annotate
+2. Output example: {"corpusId": "", "id": "", "text": "The president of the USA holds a lot of power.", "tokens": ["The", "president", "of", "the", "USA", "holds", "a", "lot", "of", "power."], "sentences": {"generator": "UnsupervisedEventExtraction", "score": 1.0, "sentenceEndPositions": [46]}, "views": [{"viewName": "TOKENS", "viewData": {"viewType": "edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView", "viewName": "TOKENS", "generator": "Cogcomp-SRL", "score": 1.0, "constituents": [{"label": "The", "score": 1.0, "start": 0, "end": 1}, {"label": "president", "score": 1.0, "start": 1, "end": 2}, {"label": "of", "score": 1.0, "start": 2, "end": 3}, {"label": "the", "score": 1.0, "start": 3, "end": 4}, {"label": "USA", "score": 1.0, "start": 4, "end": 5}, {"label": "holds", "score": 1.0, "start": 5, "end": 6}, {"label": "a", "score": 1.0, "start": 6, "end": 7}, {"label": "lot", "score": 1.0, "start": 7, "end": 8}, {"label": "of", "score": 1.0, "start": 8, "end": 9}, {"label": "power.", "score": 1.0, "start": 9, "end": 10}]}}, {"viewName": "Event_extraction", "viewData": [{"viewType": "edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView", "viewName": "event_extraction", "generator": "cogcomp_kairos_event_ie_v1.0", "score": 1.0, "constituents": [], "relations": []}]}]}