Skip to content

Commit 166a149

Browse files
author
Cognitive Computation Group
committed
upload
1 parent d3624a6 commit 166a149

32 files changed

+6025
-2
lines changed

Diff for: Analyze_parsing_results.py

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import ujson as json
2+
3+
4+
with open('data/quizlet3/data_after_event_extraction.json', 'r') as f:
5+
tmp_results = json.load(f)
6+
7+
8+
event_counter = 0
9+
for tmp_document in tmp_results:
10+
for tmp_prediction_by_sentence in tmp_document['event_extraction_results']:
11+
event_counter += len(tmp_prediction_by_sentence)
12+
13+
print('Number of extracted events:', event_counter)
14+
print('Number of sentences:', len(tmp_results))

Diff for: Better_test.py

+401
Large diffs are not rendered by default.

Diff for: Evaluation.py

+383
Large diffs are not rendered by default.

Diff for: Evaluation_by_sentence.py

+309
Large diffs are not rendered by default.

Diff for: Extraction_quizlet3.py

+65
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
from util import *
2+
from document_reader import *
3+
import os
4+
5+
folder_name = '/shared/kairos/Data/LDC2020E30_KAIROS_Quizlet_3_Source_Data_and_Graph_G/data/source/ltf/ltf/'
6+
documents = list()
7+
for tmp_file_name in os.listdir(folder_name):
8+
if 'xml' in tmp_file_name:
9+
extracted_data = ltf_reader(folder_name, tmp_file_name)
10+
documents.append(extracted_data)
11+
12+
# sentences = list()
13+
# for tmp_s in extracted_data['sentences']:
14+
# sentences.append(tmp_s['content'])
15+
16+
# new_file_name = tmp_file_name.replace('.xml', '.txt')
17+
# with open('data/quizlet3/' + new_file_name, 'w', encoding='utf-8') as f:
18+
# for s in sentences:
19+
# f.write(s)
20+
# f.write('\n')
21+
22+
# print('done')
23+
24+
25+
parser = argparse.ArgumentParser()
26+
parser.add_argument("--gpu", default='1', type=str, required=False,
27+
help="choose which gpu to use")
28+
parser.add_argument("--representation_source", default='nyt', type=str, required=False,
29+
help="choose which gpu to use")
30+
parser.add_argument("--model", default='bert-large', type=str, required=False,
31+
help="choose which gpu to use")
32+
parser.add_argument("--pooling_method", default='final', type=str, required=False,
33+
help="choose which gpu to use")
34+
parser.add_argument("--weight", default=100, type=float, required=False,
35+
help="weight assigned to triggers")
36+
parser.add_argument("--argument_matching", default='exact', type=str, required=False,
37+
help="weight assigned to triggers")
38+
parser.add_argument("--eval_model", default='joint', type=str, required=False,
39+
help="weight assigned to triggers")
40+
41+
args = parser.parse_args()
42+
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
43+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
44+
print('current device:', device)
45+
46+
47+
test_extractor = CogcompKairosEventExtractor(device)
48+
49+
results = list()
50+
for tmp_document in documents:
51+
print('We are working on document:', tmp_document["doc_id"])
52+
extracted_results = list()
53+
for tmp_s in tqdm(tmp_document['sentences']):
54+
extracted_results.append(test_extractor.extract(tmp_s['content']))
55+
tmp_document['event_extraction_results'] = extracted_results
56+
results.append(tmp_document)
57+
58+
with open('data/quizlet3/data_after_event_extraction.json', 'w') as f:
59+
json.dump(results, f)
60+
61+
62+
63+
64+
65+
print('end')

Diff for: LICENSE

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2020 Hongming ZHANG
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

Diff for: README.md

+19-2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,19 @@
1-
# KAIROS-Event-Extraction
2-
KAIROS-Event-Extraction
1+
# Kairos-cogcomp
2+
3+
### Environment
4+
1. Setup the environment with the environment.yml file (`conda myenv create -f environment.yml`)
5+
2. Downgrade the xlrd package to 1.2.0 with `pip uninstall xlrd` and then `pip install xlrd==1.2.0`
6+
7+
### Big files
8+
9+
Download files from server with `wget https://www.seas.upenn.edu/~hzhangal/etype_to_distinct_embeddings.json` and `wget https://www.seas.upenn.edu/~hzhangal/rtype_to_distinct_embeddings.json` and put them under the `data/` folder.
10+
11+
12+
13+
### Run
14+
15+
Run the service with `python backend.py`
16+
17+
### Examples
18+
1. Input example: curl -d '{"sentence": "The president of the USA holds a lot of power.", "tokens": ["The", "president", "of", "the", "USA", "holds", "a", "lot", "of", "power", "."], "identified_trigger_positions": [[5, 6], [1, 2], [7, 8], [9, 10]], "detected_mentions": {"(0, 1)": "NE", "(3, 5)": "NE"}, "trigger_to_arguments": {"(5, 6)": [[0, 1], [3, 5]], "(1, 2)": [[3, 5]], "(7, 8)": [], "(9, 10)": []}}' -H "Content-Type: application/json" -X POST http://localhost:20203/annotate
19+
2. Output example: {"corpusId": "", "id": "", "text": "The president of the USA holds a lot of power.", "tokens": ["The", "president", "of", "the", "USA", "holds", "a", "lot", "of", "power."], "sentences": {"generator": "UnsupervisedEventExtraction", "score": 1.0, "sentenceEndPositions": [46]}, "views": [{"viewName": "TOKENS", "viewData": {"viewType": "edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView", "viewName": "TOKENS", "generator": "Cogcomp-SRL", "score": 1.0, "constituents": [{"label": "The", "score": 1.0, "start": 0, "end": 1}, {"label": "president", "score": 1.0, "start": 1, "end": 2}, {"label": "of", "score": 1.0, "start": 2, "end": 3}, {"label": "the", "score": 1.0, "start": 3, "end": 4}, {"label": "USA", "score": 1.0, "start": 4, "end": 5}, {"label": "holds", "score": 1.0, "start": 5, "end": 6}, {"label": "a", "score": 1.0, "start": 6, "end": 7}, {"label": "lot", "score": 1.0, "start": 7, "end": 8}, {"label": "of", "score": 1.0, "start": 8, "end": 9}, {"label": "power.", "score": 1.0, "start": 9, "end": 10}]}}, {"viewName": "Event_extraction", "viewData": [{"viewType": "edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView", "viewName": "event_extraction", "generator": "cogcomp_kairos_event_ie_v1.0", "score": 1.0, "constituents": [], "relations": []}]}]}

Diff for: __pycache__/util.cpython-36.pyc

22.8 KB
Binary file not shown.

0 commit comments

Comments
 (0)