|
| 1 | +import os |
| 2 | +import traceback |
| 3 | +from pybliometrics.scopus import ScopusSearch, AbstractRetrieval |
| 4 | +import json |
| 5 | +from tqdm import tqdm |
| 6 | + |
| 7 | +# NOTE: config file for pybliometrics is stored in $HOME/.config/pybliometrics.cfg |
| 8 | + |
| 9 | +if __name__ == "__main__": |
| 10 | + for year in range(2013, 2024): |
| 11 | + # make the folder to store the data for the year |
| 12 | + current_path = os.getcwd() |
| 13 | + folder_path = os.path.join(current_path, str(year)) |
| 14 | + if not os.path.exists(folder_path): |
| 15 | + os.makedirs(folder_path) |
| 16 | + |
| 17 | + # get the results |
| 18 | + x = ScopusSearch( |
| 19 | + f'ABS ( "data mining" ) OR ABS ( "machine learning" ) OR TITLE ( "data mining" ) OR TITLE ( "machine learning" ) AND TITLE ( "material" ) OR ABS ( "material" ) OR SRCTITLE ( "material" ) AND SUBJAREA ( mate ) AND DOCTYPE ( "AR" ) AND SRCTYPE( j ) AND PUBYEAR = {year} AND NOT SUBJAREA (medi ) AND NOT SUBJAREA ( immu ) AND NOT SUBJAREA ( BIOC ) AND NOT SUBJAREA ( busi )', |
| 20 | + view="STANDARD") |
| 21 | + print(len(x.results)) |
| 22 | + |
| 23 | + # store the results and add the ref_docs key to store each reference |
| 24 | + for doc in tqdm(x.results): |
| 25 | + try: |
| 26 | + # store each result in a file labeled by its Scopus EID |
| 27 | + doc_dict = doc._asdict() |
| 28 | + eid = doc_dict["eid"] |
| 29 | + file_path = os.path.join(folder_path, f"{eid}.json") |
| 30 | + if not os.path.exists(file_path): |
| 31 | + # Look up the references / citations for that document |
| 32 | + document = AbstractRetrieval(eid, view="REF") |
| 33 | + refs = [] |
| 34 | + # Store the references |
| 35 | + for ref in document.references: |
| 36 | + ref_doc = {"doi": ref.doi, "title": ref.title, |
| 37 | + "id": ref.id, |
| 38 | + "sourcetitle": ref.sourcetitle} |
| 39 | + refs.append(ref_doc) |
| 40 | + doc_dict["ref_docs"] = refs |
| 41 | + # Dump the dictionary to the JSON file |
| 42 | + with open(file_path, "w") as json_file: |
| 43 | + json.dump(doc_dict, json_file) |
| 44 | + else: |
| 45 | + print("SKIP (File already exists)") |
| 46 | + |
| 47 | + # we're not going to try too hard to fix any of the rare errors |
| 48 | + except Exception as e: |
| 49 | + print(f"An error occurred: {e}") |
| 50 | + traceback.print_exc() |
0 commit comments