From 520bd3fbc266322f86688c0032f050a24b0073b6 Mon Sep 17 00:00:00 2001 From: OKUMURA Date: Wed, 9 Aug 2023 18:12:41 +0200 Subject: [PATCH] moved some pickles to exfor_dictionary --- MANIFEST.in | 7 +- pyproject.toml | 2 +- src/exforparser/config.py | 29 +++-- src/exforparser/parser/exfor_reaction.py | 3 +- src/exforparser/parser/list_x4files.py | 3 - src/exforparser/sql/creation.py | 135 ---------------------- src/exforparser/sql/models.py | 13 ++- src/exforparser/sql/queries.py | 68 +++++++++++ src/exforparser/tabulated.py | 66 ++++++++--- src/exforparser/tabulated/data_process.py | 4 +- 10 files changed, 155 insertions(+), 175 deletions(-) delete mode 100644 src/exforparser/sql/creation.py create mode 100644 src/exforparser/sql/queries.py diff --git a/MANIFEST.in b/MANIFEST.in index a014cc9..d734c5d 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,4 @@ -include src/tabulated/MTall.dat -include src/tabulated/*.json -include src/submodules/utilities/*.txt \ No newline at end of file +include src/exforparser/tabulated/MTall.dat +include src/exforparsertabulated/*.json +include src/exforparser/submodules/utilities/*.txt +include src/exforparser/pickles/*.pickle \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 70baea7..30f8f6e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,11 +36,11 @@ include-package-data = true [tool.setuptools.package-data] "exforparser.tabulated" = ["*.dat", "*.json"] +"exforparser.pickles" = ["*.pickle"] "submodules.utilities" = ["*.txt"] - [project.urls] Homepage = "https://github.com/shinokumura/exforparser" diff --git a/src/exforparser/config.py b/src/exforparser/config.py index dabe289..8ba4449 100644 --- a/src/exforparser/config.py +++ b/src/exforparser/config.py @@ -10,28 +10,41 @@ # #################################################################### import os -import sqlalchemy as db -from sqlalchemy.orm import sessionmaker +import site + +from sqlalchemy import create_engine +from sqlalchemy.orm import scoped_session, sessionmaker DEVENV = True if DEVENV: DATA_DIR = "/Users/okumuras/Documents/nucleardata/EXFOR/" + OUT_PATH = "/Users/okumuras/Desktop/" + EXFOR_PARSER = "./" else: DATA_DIR = "/srv/data/dataexplorer2/" + OUT_PATH = "/srv/data/dataexplorer2/out/" + + from importlib.resources import files + EXFOR_PARSER = files("exforparser") -EXFOR_DB = os.path.join(DATA_DIR, "exfor_tmp.sqlite") EXFOR_MASTER_REPO_PATH = os.path.join(DATA_DIR, "exfor_master") +EXFOR_DB = os.path.join(DATA_DIR, "exfor_tmp.sqlite") +# print(EXFOR_DB) + -""" Pickle path of list of EXFOR master files made by parser.list_x4files.py""" -ENTRY_INDEX_PICKLE = "pickles/entry.pickle" +""" Pickle path of list of EXFOR master files made by parser.list_x4files.py """ +ENTRY_INDEX_PICKLE = os.path.join( EXFOR_PARSER, "pickles/entry.pickle" ) -OUT_PATH = DATA_DIR + "../../../Desktop/" +""" Pickle path of list of EXFOR master files made by parser.list_x4files.py """ +SITE_DIR = site.getsitepackages()[0] +INSTITUTE_PICKLE = os.path.join( SITE_DIR, "exfor_dictionary", "pickles/institute.pickle" ) """ SQL database """ -engine = db.create_engine("sqlite:///" + EXFOR_DB) -session = sessionmaker(autocommit=False, autoflush=True, bind=engine) +engine = create_engine("sqlite:///" + EXFOR_DB) +Session = scoped_session(sessionmaker(autocommit=False, autoflush=False, bind=engine)) +session = Session() diff --git a/src/exforparser/parser/exfor_reaction.py b/src/exforparser/parser/exfor_reaction.py index 4db79ea..02dca0f 100644 --- a/src/exforparser/parser/exfor_reaction.py +++ b/src/exforparser/parser/exfor_reaction.py @@ -23,6 +23,7 @@ def split_sf(sf49): sf7 = len(sf49) > 3 and sf49[3] or None sf8 = len(sf49) > 4 and sf49[4] or None sf9 = len(sf49) > 5 and sf49[5] or None + return { "sf4": sf4, "sf5": sf5, @@ -99,7 +100,7 @@ def parse_reaction(reaction_field) -> dict: except: reaction_info = { "x4_code": x4_code, - "children": {"code": b[0], "type": None}, + "children": [ {"code": b[0], "type": None} ], "type": "?", "free_text": free_text, } diff --git a/src/exforparser/parser/list_x4files.py b/src/exforparser/parser/list_x4files.py index eae46b8..cb1ff99 100644 --- a/src/exforparser/parser/list_x4files.py +++ b/src/exforparser/parser/list_x4files.py @@ -175,8 +175,6 @@ def compare_hash(df): "14605", # Istitute two rows with free text # "G0509", # facility has pointer "O0191", - "12898", - "11945", "22436", "D0047", # ((92-U-CMP(A,N),,PY,,TT)=(6-C-CMP(A,N),,PY,,TT)) "12898", # pointer ratio normal, from Amanda, ((23-V-51(N,P)22-TI-51,,SIG)/(92-U-238(N,F),,SIG)) @@ -203,7 +201,6 @@ def compare_hash(df): "21107", "13500", "23313", - "10377", "20010", # REFERENCE ((R,KFK-1000,1968)=(R,EUR-3963E,1968)= + \n (R,EANDC(E)-111,1968)) "21902", "30328", diff --git a/src/exforparser/sql/creation.py b/src/exforparser/sql/creation.py deleted file mode 100644 index 13efeca..0000000 --- a/src/exforparser/sql/creation.py +++ /dev/null @@ -1,135 +0,0 @@ -################################################################################################ -# -# -## ------------------------ SQL database creation ------------------------ ## -# -# -################################################################################################ -import sqlalchemy as db -import pandas as pd - -from config import engine - -connection = engine.connect() -metadata = db.MetaData() - - -def drop_tables(): - for tbl in reversed(metadata.sorted_tables): - engine.execute(tbl.delete()) - - -exfor_bib = db.Table( - "exfor_bib", - metadata, - db.Column("entry", db.String(5), primary_key=True), - db.Column("title", db.String(255), index=True), - db.Column("first_author", db.String(255), index=True), - db.Column("authors", db.String(255)), - db.Column("first_author_institute", db.String(255)), - db.Column("main_facility_institute", db.String(255)), - db.Column("main_facility_type", db.String(255)), - db.Column("main_reference", db.String(255), index=True), - db.Column("year", db.Integer(), index=True), -) - - -exfor_reactions = db.Table( - "exfor_reactions", - metadata, - db.Column("entry_id", db.String(255), primary_key=True, index=True), - db.Column("entry", db.String(5)), - db.Column("target", db.String(255), index=True), - db.Column("projectile", db.String(255)), - db.Column("process", db.String(255), index=True), - db.Column("sf4", db.String(255), index=True), - db.Column("sf5", db.String(255)), - db.Column("sf6", db.String(255), index=True), - db.Column("sf7", db.String(255)), - db.Column("sf8", db.String(255)), - db.Column("sf9", db.String(255)), - db.Column("x4_code", db.String(255)), -) - - -exfor_index = db.Table( - "exfor_index", - metadata, - db.Column("id", db.Integer(), primary_key=True), - db.Column("entry_id", db.String(20), index=True), - db.Column("entry", db.String(5)), - db.Column("target", db.String(20), index=True), - db.Column("projectile", db.String(20)), - db.Column("process", db.String(40), index=True), - db.Column("sf4", db.String(20), index=True), - db.Column("residual", db.String(20), index=True), - db.Column("level_num", db.Integer(), index=True), - db.Column("e_out", db.Float(), index=True), - db.Column("e_inc_min", db.Float(), index=True), - db.Column("e_inc_max", db.Float(), index=True), - db.Column("points", db.Integer()), - db.Column("arbitrary_data", db.Boolean(), index=True), - db.Column("sf5", db.String(10)), - db.Column("sf6", db.String(10), index=True), - db.Column("sf7", db.String(10)), - db.Column("sf8", db.String(10)), - db.Column("sf9", db.String(10)), - db.Column("x4_code", db.String(255)), - db.Column("mf", db.Integer(), index=True), - db.Column("mt", db.Integer(), index=True), -) - - -exfor_data = db.Table( - "exfor_data", - metadata, - db.Column("id", db.Integer(), primary_key=True), - db.Column("entry_id", db.String(20), index=True), - db.Column("en_inc", db.Float()), - db.Column("den_inc", db.Float()), - db.Column("charge", db.Float()), - db.Column("mass", db.String(20)), # must be string for CMP cases - db.Column("isomer", db.String(20)), - db.Column("residual", db.String(20), index=True), - db.Column("level_num", db.Integer(), index=True), - db.Column("data", db.Float()), - db.Column("ddata", db.Float()), - db.Column("arbitrary_data", db.Boolean(), index=True), - db.Column("arbitrary_ddata", db.Boolean()), - db.Column("e_out", db.Float()), - db.Column("de_out", db.Float()), - db.Column("angle", db.Float()), - db.Column("dangle", db.Float()), - db.Column("mf", db.Integer(), index=True), - db.Column("mt", db.Integer(), index=True), -) - - -metadata.create_all(engine) # Creates the table - - -def insert_df_to_data(df): - df2 = df.astype(object).where(pd.notnull(df), None) - for record in df2.to_dict(orient="records"): - query = db.insert(exfor_data).values(record) - ResultProxy = connection.execute(query) - - - -def insert_bib(dictlist): - connection.execute(exfor_bib.insert(), dictlist) - - -def insert_reaction(dictlist): - connection.execute(exfor_reactions.insert(), dictlist) - - -def insert_reaction_index(dictlist): - connection.execute(exfor_index.insert(), dictlist) - - -def show(): - results = connection.execute(db.select([exfor_data])).fetchall() - df = pd.DataFrame(results) - df.columns = results[0].keys() - df.head(4) diff --git a/src/exforparser/sql/models.py b/src/exforparser/sql/models.py index f6f4d10..23afa8b 100644 --- a/src/exforparser/sql/models.py +++ b/src/exforparser/sql/models.py @@ -8,9 +8,10 @@ import sqlalchemy as db from sqlalchemy.ext.declarative import declarative_base +from config import engine -Base = declarative_base() +Base = declarative_base() class Exfor_Bib(Base): __tablename__ = "exfor_bib" @@ -63,6 +64,7 @@ class Exfor_Indexes(Base): sf8 = db.Column(db.String) sf9 = db.Column(db.String) x4_code = db.Column(db.String) + mf = db.Column(db.Integer) mt = db.Column(db.Integer) @@ -85,10 +87,13 @@ class Exfor_Data(Base): de_out = db.Column(db.Float) angle = db.Column(db.Float) dangle = db.Column(db.Float) + mf = db.Column(db.Integer) mt = db.Column(db.Integer) +Base.metadata.create_all(bind=engine) -if __name__ == "__main__": - from settings import engine - Base.metadata.create_all(bind=engine) +if __name__ == "__main__": + # from config import engine + # Base.metadata.create_all(bind=engine) + pass diff --git a/src/exforparser/sql/queries.py b/src/exforparser/sql/queries.py new file mode 100644 index 0000000..156bc74 --- /dev/null +++ b/src/exforparser/sql/queries.py @@ -0,0 +1,68 @@ + +import sqlalchemy as db +from sqlalchemy import insert +import pandas as pd + + +# from sql.creation import exfor_bib, exfor_reactions, exfor_index, exfor_data +from sql.models import Exfor_Bib, Exfor_Reactions, Exfor_Indexes, Exfor_Data +from config import engine, session + +connection = engine.connect() +metadata = db.MetaData() + + +def insert_df_to_data(df): + df2 = df.astype(object).where(pd.notnull(df), None) + # for record in df2.to_dict(orient="records"): + # query = db.insert(exfor_data).values(record) + # ResultProxy = connection.execute(query) + + df2.to_sql( + "exfor_data", + connection, + index=False, + if_exists="append", + ) + + +def insert_bib(dictlist): + # connection.execute(exfor_bib.insert(), dictlist) + + data = Exfor_Bib(**dictlist) + session.add(data) + session.commit() + + + +def insert_reaction(dictlist): + # connection.execute(exfor_reactions.insert(), dictlist) + for dict in dictlist: + data = Exfor_Reactions(**dict) + session.add(data) + session.commit() + + + +def insert_reaction_index(dictlist): + # connection.execute(exfor_index.insert(), dictlist) + for dict in dictlist: + data = Exfor_Indexes(**dict) + session.add(data) + session.commit() + + +def show(): + results = connection.execute(db.select([exfor_data])).fetchall() + df = pd.DataFrame(results) + df.columns = results[0].keys() + df.head(4) + + + +def drop_tables(): + for tbl in reversed(metadata.sorted_tables): + engine.execute(tbl.delete()) + + + diff --git a/src/exforparser/tabulated.py b/src/exforparser/tabulated.py index 02f7d1b..8286900 100644 --- a/src/exforparser/tabulated.py +++ b/src/exforparser/tabulated.py @@ -35,14 +35,12 @@ mt_fy_sf5, sig_sf5, mt_nu_sf5, - get_mf, - get_mt, ) from tabulated.data_write import * from tabulated.data_dir_files import * from tabulated.data_process import * -from sql.creation import * +from sql.queries import insert_bib, insert_reaction, insert_reaction_index # get heading list @@ -53,8 +51,7 @@ def bib_dict(entry_json): - bib_data = [ - { + bib_data = { "entry": entry_json["entry"], "title": entry_json["bib_record"]["title"] if entry_json["bib_record"].get("title") @@ -96,7 +93,7 @@ def bib_dict(entry_json): if entry_json["bib_record"].get("references") else None, } - ] + insert_bib(bib_data) return @@ -108,7 +105,6 @@ def reaction_dict_regist(entry_id, entry_json): ## Insert data table into exfor_reactions entnum, subent, pointer = entry_id.split("-") react_dict = entry_json["reactions"][subent][pointer]["children"][0] - reac_data = [ { "entry_id": entry_id, @@ -126,9 +122,9 @@ def reaction_dict_regist(entry_id, entry_json): } ] insert_reaction(reac_data) - print(reac_data) + # print(reac_data) - return + return react_dict @@ -184,6 +180,35 @@ def reaction_index_regist(entry_id, entry_json, react_dict, df): insert_reaction_index(reac_index) return None + elif not df.loc[df['residual'].isnull() & df['level_num'].isnull() ].empty: + mf, mt = get_unique_mf_mt(df) + reac_index = [ + { + "entry_id": entry_id, + "entry": entnum, + "target": react_dict["target"], + "projectile": react_dict["process"].split(",")[0], + "process": react_dict["process"], + "sf4": react_dict["sf4"], + "residual": None, + "level_num": None, + "e_out": None, + "e_inc_min": df["en_inc"].min(), + "e_inc_max": df["en_inc"].max(), + "points": len(df.index), + "arbitrary_data": df["arbitrary_data"].unique()[0], + "sf5": react_dict["sf5"], + "sf6": react_dict["sf6"], + "sf7": react_dict["sf7"], + "sf8": react_dict["sf8"], + "sf9": react_dict["sf9"], + "x4_code": entry_json["reactions"][subent][pointer]["x4_code"], + "mf": int(mf) if mf else None, + "mt": int(mt) if mt else None, + } + ] + insert_reaction_index(reac_index) + else: for r in df["residual"].unique(): for l in df["level_num"].unique(): @@ -192,7 +217,6 @@ def reaction_index_regist(entry_id, entry_json, react_dict, df): ## if the level number is not known but the e_out (E-LVL or E-EXC) is known. l = None df2 = df[(df["residual"] == r) & (df["level_num"].isnull())] - for eo in df2["e_out"].unique(): mf, mt = get_unique_mf_mt(df2) @@ -910,11 +934,16 @@ def tabulated_to_exfortables_format(id, entry_json, data_dict_conv): return + + + + def main(entnum): entry_json = convert_exfor_to_json(entnum) write_dict_to_json(entnum, entry_json) if entry_json: + # bib_dict(entry_json) try: bib_dict(entry_json) except: @@ -962,21 +991,22 @@ def main(entnum): entry_id = entnum + subent - tabulated_to_exfortables_format(entry_id, entry_json, data_dict_conv) - # try: - # tabulated_to_exfortables_format(entry_id, entry_json, data_dict_conv) + # tabulated_to_exfortables_format(entry_id, entry_json, data_dict_conv) + try: + tabulated_to_exfortables_format(entry_id, entry_json, data_dict_conv) - # except: - # logging.error(f"Tabulated error: at ENTRY: '{entry_id}',") + except: + logging.error(f"Tabulated error: at ENTRY: '{entry_id}',") return if __name__ == "__main__": - # ent = list_entries_from_df() - # entries = random.sample(ent, len(ent)) - entries = good_example_entries + ent = list_entries_from_df() + entries = random.sample(ent, len(ent)) + # entries = list(dict.fromkeys(good_example_entries)) + # entries = [ "10963", "12544", "30441", "30125", ] start_time = print_time() logging.info(f"Start processing {start_time}") diff --git a/src/exforparser/tabulated/data_process.py b/src/exforparser/tabulated/data_process.py index 2fec498..cb63944 100644 --- a/src/exforparser/tabulated/data_process.py +++ b/src/exforparser/tabulated/data_process.py @@ -13,7 +13,7 @@ import pandas as pd import re -from sql.creation import insert_reaction, insert_reaction_index, insert_df_to_data +from sql.queries import insert_df_to_data from submodules.utilities.elem import ztoelem from tabulated.data_locations import * from tabulated.exfor_reaction_mt import get_mf, get_mt, e_lvl_to_mt50 @@ -556,7 +556,7 @@ def process_general(entry_id, entry_json, data_dict_conv): ## Insert data table into exfor_data if not df.empty: insert_df_to_data(df) - print(df) + # print(df) return df