diff --git a/lab6_named_entity_recognition/README.md b/lab6_named_entity_recognition/README.md index 6f1ea90..55bbcd1 100644 --- a/lab6_named_entity_recognition/README.md +++ b/lab6_named_entity_recognition/README.md @@ -12,23 +12,23 @@ poetry install ## Native -Naive version implemented in [naive.py](naive.py) uses a regex to extract proper names. +Naive version implemented in [named_entity_recognition/naive.py](named_entity_recognition/naive.py) uses a regex to extract proper names. ```bash -poetry run python naive.py +poetry run python named_entity_recognition/naive.py ``` -Results are saved to [output/naive.txt](output/naive.txt). +Results are saved to [named_entity_recognition/output/naive.txt](named_entity_recognition/output/naive.txt). ## NLTK -Version implemented in [using_nltk.py](using_nltk.py) uses NLTK's word tokenizer, part of speech tagger, and finally named entities chunker. +Version implemented in [named_entity_recognition/using_nltk.py](named_entity_recognition/using_nltk.py) uses NLTK's word tokenizer, part of speech tagger, and finally named entities chunker. ```bash -poetry run python using_nltk.py +poetry run python named_entity_recognition/using_nltk.py ``` -Results are saved to [output/naive.txt](output/nltk.txt). +Results are saved to [named_entity_recognition/output/naive.txt](named_entity_recognition/output/nltk.txt). ### Initial setup @@ -50,20 +50,20 @@ $ poetry run python ## Tag with Wikidata (SPARQL) -Named entites found with NLTK are annotated with what they are instances of accorcing to Wikidata, see implementation in [tag_with_wikidata_sparql.py](tag_with_wikidata_sparql.py). +Named entites found with NLTK are annotated with what they are instances of accorcing to Wikidata, see implementation in [named_entity_recognition/tag_with_wikidata_sparql.py](named_entity_recognition/tag_with_wikidata_sparql.py). ```bash -poetry run python tag_with_wikidata_sparql.py +poetry run python named_entity_recognition/tag_with_wikidata_sparql.py ``` -Results are saved to HTML table in [output/tagged_with_wikidata.html](output/tagged_with_wikidata.html). +Results are saved to HTML table in [named_entity_recognition/output/tagged_with_wikidata.html](named_entity_recognition/output/tagged_with_wikidata.html). ## Tag with Google Knowledge Graph Search API -Named entites found with NLTK are annotated with what type they are accorcing to Knowledge Graph, see implementation in [tag_with_google_knowledge_graph.py](tag_with_google_knowledge_graph.py). You need to get API key from [here](https://console.cloud.google.com/flows/enableapi?apiid=kgsearch.googleapis.com&credential=client_key). +Named entites found with NLTK are annotated with what type they are accorcing to Knowledge Graph, see implementation in [named_entity_recognition/tag_with_google_knowledge_graph.py](named_entity_recognition/tag_with_google_knowledge_graph.py). You need to get API key from [here](https://console.cloud.google.com/flows/enableapi?apiid=kgsearch.googleapis.com&credential=client_key). ```bash -API_KEY={put your API key here} poetry run python tag_with_google_knowledge_graph.py +API_KEY={put your API key here} poetry run python named_entity_recognition/tag_with_google_knowledge_graph.py ``` -Results are saved to HTML table in [output/tagged_with_google_graph.html](tagged_with_google_graph.html). +Results are saved to HTML table in [named_entity_recognition/output/tagged_with_google_graph.html](named_entity_recognition/output/tagged_with_google_graph.html). diff --git a/lab6_named_entity_recognition/named_entity_recognition/tag_with_google_knowledge_graph.py b/lab6_named_entity_recognition/named_entity_recognition/tag_with_google_knowledge_graph.py index 341080d..cdc134c 100644 --- a/lab6_named_entity_recognition/named_entity_recognition/tag_with_google_knowledge_graph.py +++ b/lab6_named_entity_recognition/named_entity_recognition/tag_with_google_knowledge_graph.py @@ -1,13 +1,12 @@ -from named_entity_recognition.utils import (join_with_newlines, load_list, - save_text) import json -import time -import urllib +import os from typing import List, Tuple from urllib.parse import quote import urllib3 -import os + +from named_entity_recognition.utils import (join_with_newlines, load_list, + save_text) LIMIT = 0 @@ -48,7 +47,8 @@ def fetch_classes_from_google(api_key: str, name: str) -> str: jsonld = json.loads(response.data) classes = set() if 'itemListElement' not in jsonld: - raise RuntimeError('Incorrect response: ' + json.dumps(jsonld, indent=2)) + raise RuntimeError('Incorrect response: ' + + json.dumps(jsonld, indent=2)) for item in jsonld['itemListElement']: try: for t in item['result']['@type']: diff --git a/lab6_named_entity_recognition/named_entity_recognition/utils.py b/lab6_named_entity_recognition/named_entity_recognition/utils.py index a0a3570..00095ba 100644 --- a/lab6_named_entity_recognition/named_entity_recognition/utils.py +++ b/lab6_named_entity_recognition/named_entity_recognition/utils.py @@ -1,24 +1,27 @@ from typing import Iterable, List +import os + +CURRENT_DIR = os.path.dirname(os.path.realpath(__file__)) def load_text(file_path: str) -> str: - with open(file_path) as f: + with open(os.path.join(CURRENT_DIR, file_path)) as f: return f.read() def save_text(file_path: str, text: str) -> None: - with open(file_path, 'w') as f: + with open(os.path.join(CURRENT_DIR, file_path), 'w') as f: return f.write(text) def save_list(file_name: str, list: List) -> None: - with open(f'output/{file_name}', 'w') as f: + with open(os.path.join(CURRENT_DIR, f'output/{file_name}'), 'w') as f: for el in list: f.write(f'{el}\n') def load_list(file_path: str) -> List[str]: - with open(file_path) as f: + with open(os.path.join(CURRENT_DIR, file_path)) as f: return [x.strip() for x in f.readlines()] def join_with_newlines(x: Iterable[str]) -> str: