From d31f2130b1b21f8dd447721a93fb7fc88a21fab6 Mon Sep 17 00:00:00 2001 From: Shin Okumura Date: Tue, 1 Aug 2023 22:43:17 +0200 Subject: [PATCH] update package setup --- .github/workflows/auto_update.yml | 2 - .gitmodules | 3 + VERSION | 1 + config.py | 33 +---- exparser.py | 10 +- parser/exfor_bib.py | 2 +- parser/exfor_data.py | 13 +- requirements.txt | 19 ++- setup.py | 33 +++++ sql/creation.py | 37 +++--- sql/models.py | 8 +- sql/queries.py | 10 +- submodules | 1 + tabulated.py | 78 ++++++------ tabulated/data_process.py | 56 +++++---- tabulated/exfor_reaction_mt.py | 10 +- utilities/elem.py | 168 -------------------------- utilities/utilities.py | 193 ------------------------------ 18 files changed, 167 insertions(+), 510 deletions(-) create mode 100644 .gitmodules create mode 100644 VERSION create mode 100644 setup.py create mode 160000 submodules delete mode 100644 utilities/elem.py delete mode 100644 utilities/utilities.py diff --git a/.github/workflows/auto_update.yml b/.github/workflows/auto_update.yml index 5680201..0091cc0 100644 --- a/.github/workflows/auto_update.yml +++ b/.github/workflows/auto_update.yml @@ -56,9 +56,7 @@ jobs: EMAIL_RECIPIENT: ${{ secrets.EMAIL_RECIPIENT }} run: | cd ${GITHUB_WORKSPACE} - git clone https://github.com/IAEA-NDS/exfor_dictionary.git git clone https://github.com/IAEA-NDS/exfor_master.git - git clone https://github.com/IAEA-NDS/exfor_json.git python exparser.py diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..fb2c7e9 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "submodules"] + path = submodules + url = https://github.com/shinokumura/exparser-submodule.git diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..9dd28c2 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +1.0.0beta \ No newline at end of file diff --git a/config.py b/config.py index 0df8768..c6deea7 100644 --- a/config.py +++ b/config.py @@ -9,8 +9,8 @@ # Contact: nds.contact-point@iaea.org # #################################################################### -import sys -import os +import sqlalchemy as db +from sqlalchemy.orm import sessionmaker DEVENV = True @@ -25,36 +25,13 @@ -EXFOR_DB = DATA_DIR + "exfor_tmp.sqlite" -ENDFTAB_DB = DATA_DIR + "endftables.sqlite" - - -## Package locations -EXFOR_PARSER = "exforparser/" -EXFOR_DICTIONARY = "exfor_dictionary/" -RIPL3 = "ripl3_json/" - - -sys.path.append(os.path.join(MODULES_DIR, EXFOR_DICTIONARY)) -sys.path.append(os.path.join(MODULES_DIR, RIPL3)) - - - -""" pickel file path """ -PICKLE_PATH = os.path.join(MODULES_DIR, EXFOR_DICTIONARY, "pickles/") - +EXFOR_DB = DATA_DIR + "exfor.sqlite" """ Pickle path of list of EXFOR master files made by parser.list_x4files.py""" ENTRY_INDEX_PICKLE = "pickles/entry.pickle" - -""" EXFOR master file path """ -EXFOR_MASTER_REPO_PATH = os.path.join(DATA_DIR, "exfor_master/") -EXFOR_ALL_PATH = os.path.join(EXFOR_MASTER_REPO_PATH, "exforall/") - - TO_JSON = True POST_DB = False @@ -62,10 +39,6 @@ OUT_PATH = DATA_DIR + "../../../Desktop/" -import sqlalchemy as db -from sqlalchemy.orm import sessionmaker - - """ SQL database """ engine = db.create_engine("sqlite:///" + EXFOR_DB) session = sessionmaker(autocommit=False, autoflush=True, bind=engine) diff --git a/exparser.py b/exparser.py index 0a74921..5e2b2c2 100644 --- a/exparser.py +++ b/exparser.py @@ -21,11 +21,10 @@ from config import OUT_PATH -from utilities.utilities import del_outputs, print_time, get_entry_update_date +from submodules.utilities.util import del_outputs, print_time, get_entry_update_date from parser.list_x4files import list_entries_from_df from parser.exfor_entry import Entry from parser.exfor_subentry import Subentry -#from mongodb import post_one_mongodb, post_many_mongodb ## get update data from git commit and store info to Python dictionary @@ -97,7 +96,6 @@ def convert_exfor_to_json(entnum=None): def main(entnum): - # del_outputs(OUT_PATH + "json/") start_time = print_time() logging.info(f"Start processing {start_time}") @@ -105,7 +103,6 @@ def main(entnum): try: entry_json = convert_exfor_to_json(entnum) write_dict_to_json(entnum, entry_json) - # post_one_mongodb("exfor_json", entry_json) except: logging.error(f"ERROR: at ENTRY: {entnum}") @@ -114,10 +111,9 @@ def main(entnum): if __name__ == "__main__": - # ent = list_entries_from_df() + ent = list_entries_from_df() # entries = random.sample(ent, len(ent)) - # entries = ent - entries=["32617", "40016", "30936", "C1823", "E2286"] #, "12240", "41185", "41102", "30010", "11210"] + entries = ent for entnum in entries: print(entnum) main(entnum) diff --git a/parser/exfor_bib.py b/parser/exfor_bib.py index 4c4ea75..04cd993 100644 --- a/parser/exfor_bib.py +++ b/parser/exfor_bib.py @@ -16,7 +16,7 @@ from .exfor_field import * -from utilities.utilities import flatten_list +from submodules.utilities.util import flatten_list from .exfor_block import get_identifier_details diff --git a/parser/exfor_data.py b/parser/exfor_data.py index 70cc5de..0f105e3 100644 --- a/parser/exfor_data.py +++ b/parser/exfor_data.py @@ -9,17 +9,24 @@ # Contact: nds.contact-point@iaea.org # #################################################################### -import sys from pyparsing import * from .exfor_field import data_header -from utilities.utilities import corr, flatten_list -from utilities.elem import ztoelem, numtoisomer +from submodules.utilities.util import flatten_list +from submodules.utilities.elem import ztoelem # from dictionary.exfor_dictionary import Diction # sys.path.append("../exfor_dictionary/") from exfor_dictionary import Diction +def corr(invalue): + if re.search(r"\d|\.[+]\d", invalue): + # invalue = invalue.replace("+", "E+") + invalue = re.sub(r"(\d|\.)([+])(\d)", r"\1E+\3", invalue) + if re.search(r"\d|\.[-]\d", invalue): + # invalue = invalue.replace("-", "E-") + invalue = re.sub(r"(\d|\.)([-])(\d)", r"\1E-\3", invalue) + return invalue def data_column_read(line): column = [0, 11, 22, 33, 44, 55] diff --git a/requirements.txt b/requirements.txt index 291a598..f673513 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,8 @@ -requests -dnspython -matplotlib -numpy -pandas -plotly -pymongo==4.1.1 -pymongo[srv] -ripl3_json @ https://github.com/shinokumura/ripl3_json.git -exfor_master @ https://github.com/IAEA-NDS/exfor_master.git -exfor_dictionary @ https://github.com/IAEA-NDS/exfor_dictionary.git +common==0.1.2 +numpy==1.23.0 +pandas==2.0.3 +pyparsing==3.1.1 +setuptools==57.5.0 +SQLAlchemy==2.0.18 +exfor_dictionary @ git+https://github.com/IAEA-NDS/exfor_dictionary@main +ripl3_json @ git+https://github.com/shinokumura/ripl3_json@main \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..c319b7c --- /dev/null +++ b/setup.py @@ -0,0 +1,33 @@ +from setuptools import setup, find_packages + + +def read_requirements(file): + with open(file) as f: + return f.read().splitlines() + +def read_file(file): + with open(file) as f: + return f.read() + +version = read_file("VERSION") +requirements = read_requirements("requirements.txt") + +setup( + name="exforparser", + description="EXFOR Parser", + packages=find_packages(exclude=["test"]), + version=version, + author="Shin Okumura/IAEA-NDS", + author_email="s.okumura@iaea.org", + maintainer="IAEA-NDS", + maintainer_email="nds.contact-point@iaea.org", + license="GPL-2.0 license", + url="https://github.com/s.okumura@iaea.org/exforparser", + python_requires=">=3.8", + install_requires=requirements, + classifiers=[ + "Programming Language :: Python :: 3", + "Operating System :: OS Independent", + "License :: OSI Approved :: GPL-2.0 license", + ], +) diff --git a/sql/creation.py b/sql/creation.py index b4902da..13efeca 100644 --- a/sql/creation.py +++ b/sql/creation.py @@ -38,7 +38,7 @@ def drop_tables(): "exfor_reactions", metadata, db.Column("entry_id", db.String(255), primary_key=True, index=True), - db.Column("entry", db.String(255)), + db.Column("entry", db.String(5)), db.Column("target", db.String(255), index=True), db.Column("projectile", db.String(255)), db.Column("process", db.String(255), index=True), @@ -56,24 +56,24 @@ def drop_tables(): "exfor_index", metadata, db.Column("id", db.Integer(), primary_key=True), - db.Column("entry_id", db.String(255), index=True), - db.Column("entry", db.String(255)), - db.Column("target", db.String(255), index=True), - db.Column("projectile", db.String(255)), - db.Column("process", db.String(255), index=True), - db.Column("sf4", db.String(255), index=True), - db.Column("residual", db.String(255), index=True), + db.Column("entry_id", db.String(20), index=True), + db.Column("entry", db.String(5)), + db.Column("target", db.String(20), index=True), + db.Column("projectile", db.String(20)), + db.Column("process", db.String(40), index=True), + db.Column("sf4", db.String(20), index=True), + db.Column("residual", db.String(20), index=True), db.Column("level_num", db.Integer(), index=True), db.Column("e_out", db.Float(), index=True), db.Column("e_inc_min", db.Float(), index=True), db.Column("e_inc_max", db.Float(), index=True), db.Column("points", db.Integer()), db.Column("arbitrary_data", db.Boolean(), index=True), - db.Column("sf5", db.String(255)), - db.Column("sf6", db.String(255), index=True), - db.Column("sf7", db.String(255)), - db.Column("sf8", db.String(255)), - db.Column("sf9", db.String(255)), + db.Column("sf5", db.String(10)), + db.Column("sf6", db.String(10), index=True), + db.Column("sf7", db.String(10)), + db.Column("sf8", db.String(10)), + db.Column("sf9", db.String(10)), db.Column("x4_code", db.String(255)), db.Column("mf", db.Integer(), index=True), db.Column("mt", db.Integer(), index=True), @@ -84,13 +84,13 @@ def drop_tables(): "exfor_data", metadata, db.Column("id", db.Integer(), primary_key=True), - db.Column("entry_id", db.String(255), index=True), + db.Column("entry_id", db.String(20), index=True), db.Column("en_inc", db.Float()), db.Column("den_inc", db.Float()), db.Column("charge", db.Float()), - db.Column("mass", db.Float()), - db.Column("isomer", db.String(255)), - db.Column("residual", db.String(255), index=True), + db.Column("mass", db.String(20)), # must be string for CMP cases + db.Column("isomer", db.String(20)), + db.Column("residual", db.String(20), index=True), db.Column("level_num", db.Integer(), index=True), db.Column("data", db.Float()), db.Column("ddata", db.Float()), @@ -110,13 +110,12 @@ def drop_tables(): def insert_df_to_data(df): df2 = df.astype(object).where(pd.notnull(df), None) - # print(df2.to_dict(orient='records')) - # list = [] for record in df2.to_dict(orient="records"): query = db.insert(exfor_data).values(record) ResultProxy = connection.execute(query) + def insert_bib(dictlist): connection.execute(exfor_bib.insert(), dictlist) diff --git a/sql/models.py b/sql/models.py index 3803179..f6f4d10 100644 --- a/sql/models.py +++ b/sql/models.py @@ -56,7 +56,7 @@ class Exfor_Indexes(Base): e_inc_min = db.Column(db.Float) e_inc_max = db.Column(db.Float) points = db.Column(db.Integer) - # arbitrary_data = db.Column(db.Boolean) + arbitrary_data = db.Column(db.Boolean) sf5 = db.Column(db.String) sf6 = db.Column(db.String) sf7 = db.Column(db.String) @@ -73,14 +73,14 @@ class Exfor_Data(Base): en_inc = db.Column(db.String) den_inc = db.Column(db.String) charge = db.Column(db.Float) - mass = db.Column(db.Float) + mass = db.Column(db.String) isomer = db.Column(db.String) residual = db.Column(db.String) level_num = db.Column(db.Integer) data = db.Column(db.Float) ddata = db.Column(db.Float) - # arbitrary_data = db.Column(db.Boolean) - # arbitrary_ddata = db.Column(db.Boolean) + arbitrary_data = db.Column(db.Boolean) + arbitrary_ddata = db.Column(db.Boolean) e_out = db.Column(db.Float) de_out = db.Column(db.Float) angle = db.Column(db.Float) diff --git a/sql/queries.py b/sql/queries.py index f47253b..50841eb 100644 --- a/sql/queries.py +++ b/sql/queries.py @@ -50,7 +50,8 @@ def reaction_query_simple(type, elem, mass, reaction, branch): queries = [Exfor_Indexes.target == target, Exfor_Indexes.process == reaction.upper(), - Exfor_Indexes.sf6 == type.upper()] + Exfor_Indexes.sf6 == type.upper(), + Exfor_Indexes.arbitrary_data == False] @@ -81,7 +82,8 @@ def reaction_query(type, elem, mass, reaction, branch=None, rp_elem=None, rp_mas reac = None target = elemtoz_nz(elem) + "-" + elem.upper() + "-" + mass - queries = [Exfor_Indexes.target == target] + queries = [Exfor_Indexes.target == target, + Exfor_Indexes.arbitrary_data == False] if branch: if branch == "PAR": @@ -294,6 +296,7 @@ def reaction_query_fy(type, elem, mass, reaction, branch, mesurement_opt_fy, ene Exfor_Indexes.sf6 == type.upper(), Exfor_Indexes.e_inc_min >= lower, Exfor_Indexes.e_inc_max <= upper, + Exfor_Indexes.arbitrary_data == False, (Exfor_Indexes.sf4 == "MASS" if mesurement_opt_fy=="A" else Exfor_Indexes.sf4 == "ELEM" if mesurement_opt_fy=="Z" else Exfor_Indexes.sf4.isnot(None) ), ).all() @@ -326,7 +329,8 @@ def reaction_query_fission(type, elem, mass, reaction, branch, energy_range): target = elemtoz_nz(elem) + "-" + elem.upper() + "-" + mass queries = [Exfor_Indexes.target == target, - Exfor_Indexes.process == reaction.upper(),] + Exfor_Indexes.process == reaction.upper(), + Exfor_Indexes.arbitrary_data == False] if branch == "nu_n": sf5 = ["PR"] diff --git a/submodules b/submodules new file mode 160000 index 0000000..6401183 --- /dev/null +++ b/submodules @@ -0,0 +1 @@ +Subproject commit 64011838b02dbeb752b857aec9dd8fdc589e6f92 diff --git a/tabulated.py b/tabulated.py index b84b308..80873a4 100644 --- a/tabulated.py +++ b/tabulated.py @@ -125,6 +125,7 @@ def reaction_dict_regist(entry_id, entry_json): } ] insert_reaction(reac_data) + print(reac_data) return @@ -168,6 +169,7 @@ def reaction_index_regist(entry_id, entry_json, react_dict, df): "e_inc_min": None, "e_inc_max": None, "points": None, + "arbitrary_data": None, "sf5": react_dict["sf5"], "sf6": react_dict["sf6"], "sf7": react_dict["sf7"], @@ -207,6 +209,7 @@ def reaction_index_regist(entry_id, entry_json, react_dict, df): "e_inc_min": df2["en_inc"].min(), "e_inc_max": df2["en_inc"].max(), "points": len(df2.index), + "arbitrary_data": df2["arbitrary_data"].unique()[0], "sf5": react_dict["sf5"], "sf6": react_dict["sf6"], "sf7": react_dict["sf7"], @@ -244,6 +247,7 @@ def reaction_index_regist(entry_id, entry_json, react_dict, df): "e_inc_min": df2["en_inc"].min(), "e_inc_max": df2["en_inc"].max(), "points": len(df2.index), + "arbitrary_data": df2["arbitrary_data"].unique()[0], "sf5": react_dict["sf5"], "sf6": react_dict["sf6"], "sf7": react_dict["sf7"], @@ -289,6 +293,9 @@ def tabulated_to_exfortables_format(id, entry_json, data_dict_conv): df = process_general(entry_id, entry_json, data_dict_conv) reaction_index_regist(entry_id, entry_json, react_dict, df) + ## If the DATA is given by arbitrary unit (ARB-UNIT) or no dimension (NO-DIM) + df = df.loc[df["arbitrary_data"] == 0 ] + if df.empty: continue @@ -386,13 +393,16 @@ def tabulated_to_exfortables_format(id, entry_json, data_dict_conv): # reaction_index_regist(entry_id, entry_json, react_dict, pd.DataFrame()) continue - if len(df["level_num"].unique()) == 0: + if len(df["level_num"].unique()) == 0 or not df["level_num"].unique().all(): continue for level_num in df["level_num"].unique(): df2 = df[df["level_num"] == level_num] - mf, mt = get_unique_mf_mt(df2) + if df2.empty: + continue + + mf, mt = get_unique_mf_mt(df2) dir = get_dir_name(react_dict, level_num=level_num, subdir=None) filename = exfortables_filename( dir, @@ -412,6 +422,7 @@ def tabulated_to_exfortables_format(id, entry_json, data_dict_conv): + react_dict["target"].split("-")[3].lower() ), ) + write_to_exfortables_format_sig( entry_id, dir, @@ -453,7 +464,7 @@ def tabulated_to_exfortables_format(id, entry_json, data_dict_conv): if ( react_dict["target"].split("-")[2] == "0" - and react_dict["sf4"] is None + or react_dict["sf4"] is None ): ## case for ,DA without product filename = exfortables_filename( @@ -665,26 +676,29 @@ def tabulated_to_exfortables_format(id, entry_json, data_dict_conv): ) else: - prod = react_dict["sf4"] ## expect MASS, then format should be like FPY - filename = exfortables_filename( - dir, - entry_id, - react_dict["process"].replace(",", "-").lower(), - react_dict, - entry_json["bib_record"], - None, - prod, - ) - - write_to_exfortables_format_fy( - entry_id, - dir, - filename, - entry_json["bib_record"], - react_dict, - str(mf) + " - " + str(mt), - df, - ) + continue + ## expect MASS or ELEM/MASS, then format should be like FPY + ## not output + # prod = react_dict["sf4"] + # filename = exfortables_filename( + # dir, + # entry_id, + # react_dict["process"].replace(",", "-").lower(), + # react_dict, + # entry_json["bib_record"], + # None, + # prod, + # ) + + # write_to_exfortables_format_fy( + # entry_id, + # dir, + # filename, + # entry_json["bib_record"], + # react_dict, + # str(mf) + " - " + str(mt), + # df, + # ) elif ( react_dict["sf6"] == "NU/DE" or react_dict["sf6"] == "FY/DE" @@ -896,7 +910,6 @@ def tabulated_to_exfortables_format(id, entry_json, data_dict_conv): def main(entnum): - # entry_json = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))) entry_json = convert_exfor_to_json(entnum) write_dict_to_json(entnum, entry_json) @@ -948,27 +961,20 @@ def main(entnum): entry_id = entnum + subent - # tabulated_to_exfortables_format(entry_id, entry_json, data_dict_conv) - try: - tabulated_to_exfortables_format(entry_id, entry_json, data_dict_conv) + tabulated_to_exfortables_format(entry_id, entry_json, data_dict_conv) + # try: + # tabulated_to_exfortables_format(entry_id, entry_json, data_dict_conv) - except: - logging.error(f"Tabulated error: at ENTRY: '{entry_id}',") + # except: + # logging.error(f"Tabulated error: at ENTRY: '{entry_id}',") return if __name__ == "__main__": - # failed = ["13317", "14369", "40106", "D5081", "13277", "D0487", "14625", "32791", "12709", "13482", "F1099", "41075", "33120", "30751", "33028", "C1581", "F0114", "13480", "40206", "13385", "23444", "13332", "A0095", "13458", "23271", "D0847", "D8036", "G0066", "L0105", "F0332", "10722", "33119", "13396", "D8036", "33160", "40545", "21495", "13072", "32632", "23213", "C0969" ] ent = list_entries_from_df() entries = random.sample(ent, len(ent)) - # entries = failed - # entries=["D6274","D0193","20905", "40016", "22100", "T0243", "12240", "41185", "41102", "30010", "11210"] - # entries = good_example_entries - - # drop_tables() - # del_outputs(OUT_PATH + "exfortables/") start_time = print_time() logging.info(f"Start processing {start_time}") diff --git a/tabulated/data_process.py b/tabulated/data_process.py index 05e2e00..83e4e07 100644 --- a/tabulated/data_process.py +++ b/tabulated/data_process.py @@ -11,15 +11,15 @@ # #################################################################### import pandas as pd -import numpy as np import re from sql.creation import insert_reaction, insert_reaction_index, insert_df_to_data -from utilities.elem import ztoelem -from .data_locations import * +from submodules.utilities.elem import ztoelem +from tabulated.data_locations import * from tabulated.exfor_reaction_mt import get_mf, get_mt, e_lvl_to_mt50 from ripl3_json.ripl3_descretelevel import RIPL_Level + def limit_data_dict_by_locs(locs, data_dict): new = {} for key, val in data_dict.items(): @@ -34,7 +34,6 @@ def limit_data_dict_by_locs(locs, data_dict): def data_length_unify(data_dict): data_len = [] - new_list = [] data_list = data_dict["data"] for i in data_list: @@ -68,8 +67,6 @@ def evaluated_data_points(**kwargs): if not kwargs[col]: del kwargs[col] - # print(kwargs) - for col in kwargs: for dl in range(len(kwargs[col])): @@ -92,13 +89,11 @@ def evaluated_data_points(**kwargs): # else: # new_x.append(None) # break - # print(a) else: a = kwargs[col][dl] new_x.append(a) - # print(dl, a, new_x) assert len(kwargs[col]) == len(new_x) @@ -164,8 +159,6 @@ def get_average(sf4, data_dict_conv): x_min=x_min, x_max=x_max, ) - # print(new_x) - if not new_x: x_temp = {} @@ -177,6 +170,8 @@ def get_average(sf4, data_dict_conv): return new_x + + def process_general(entry_id, entry_json, data_dict_conv): entnum, subent, pointer = entry_id.split("-") @@ -221,7 +216,7 @@ def process_general(entry_id, entry_json, data_dict_conv): ## get data column position ## --------------------------------------------------------------------------------------- ## locs["locs_y"], locs["locs_dy"] = get_y_locs(data_dict_conv) - # print(locs) + if not locs["locs_y"]: locs["locs_y"], locs["locs_dy"] = get_y_locs_by_pointer(pointer, data_dict_conv) @@ -234,7 +229,7 @@ def process_general(entry_id, entry_json, data_dict_conv): ) if locs["locs_y"]: - if data_dict_conv["units"][locs["locs_y"][0]] == "ARB-UNITS": + if any(data_dict_conv["units"][locs["locs_y"][0]] == arb for arb in ("ARB-UNITS", "NO-DIM")): data_unit_flag = True @@ -245,9 +240,10 @@ def process_general(entry_id, entry_json, data_dict_conv): for y, dy in zip(data, data_dict_conv["data"][locs["locs_dy"][0]]) ] - elif data_dict_conv["units"][locs["locs_dy"][0]] == "ARB-UNITS": + elif any(data_dict_conv["units"][locs["locs_dy"][0]] == arb for arb in ("ARB-UNITS", "NO-DIM")): ddata_unit_flag = True + elif ( data_dict_conv["units"][locs["locs_y"][0]] == data_dict_conv["units"][locs["locs_dy"][0]] @@ -281,6 +277,7 @@ def process_general(entry_id, entry_json, data_dict_conv): residual = [prod] * len(data_dict_conv["data"][locs["locs_y"][0]]) + elif any(i == react_dict["sf4"] for i in ("ELEM", "MASS", "ELEM/MASS")): ## get positions @@ -387,6 +384,7 @@ def process_general(entry_id, entry_json, data_dict_conv): "EN", limit_data_dict_by_locs(locs["locs_en"], data_dict_conv) ) ] + else: ## e.g. ## S0102002 (14-SI-0(4-BE-9,NON),,SIG) 単位がMEV/A @@ -424,18 +422,20 @@ def process_general(entry_id, entry_json, data_dict_conv): if not locs["locs_e"] and not locs["locs_de"]: + ### if there is no outgoing energy column, it means that the reaction is not partial e_out = [None] * len(data) de_out = [None] * len(data) mt = [get_mt(react_dict)] * len(data) if locs["locs_e"] and data_dict_conv["heads"][locs["locs_e"][0]] == "LVL-NUMB": + ## take the first column of LVL-NUMB if there are some level_num = [int(l) for l in data_dict_conv["data"][locs["locs_e"][0]]] mt = [e_lvl_to_mt50(l) for l in level_num] e_out = [None] * len(data) - elif len(locs["locs_e"]) == 1 and data_dict_conv["heads"][ + if len(locs["locs_e"]) == 1 and data_dict_conv["heads"][ locs["locs_e"][0] ].startswith("E"): e_out = [ @@ -445,20 +445,24 @@ def process_general(entry_id, entry_json, data_dict_conv): elif len(locs["locs_e"]) > 1: - e_out = [ - e / 1e6 if e is not None else None - for e in get_average( - "E", limit_data_dict_by_locs(locs["locs_e"], data_dict_conv) - ) - ] - + ## get average + # e_out = [ + # e / 1e6 if e is not None else None + # for e in get_average( + # "E", limit_data_dict_by_locs(locs["locs_e"], data_dict_conv) + # ) + # ] + ## should take the lowest one, but temporary take the first one + e_out = [e / 1e6 if e is not None else None for e in data_dict_conv["data"][locs["locs_e"][0]]] if ( not react_dict["target"].endswith("-0") and react_dict["sf5"] == "PAR" and react_dict["sf4"] + and not any(p == react_dict["sf4"] for p in ("0-NN-1", "0-G-0")) and all(eo is not None for eo in e_out) and not level_num + and all(t is None for t in mt) ): for e_lvl in e_out: @@ -472,6 +476,9 @@ def process_general(entry_id, entry_json, data_dict_conv): mt += [e_lvl_to_mt50(L.ripl_find_level_num())] assert len(level_num) == len(e_out) + elif not all(t is None for t in mt): + pass + else: level_num = [None] * len(data) mt = [None] * len(data) @@ -520,7 +527,6 @@ def process_general(entry_id, entry_json, data_dict_conv): ## Data Process ## --------------------------------------------------------------------------------------- ## - df = pd.DataFrame( { "entry_id": entry_id, @@ -544,15 +550,13 @@ def process_general(entry_id, entry_json, data_dict_conv): } ) - # df = df[~df["data"].isna()] + df = df[~df["data"].isna()] # df = df[~df["en_inc"].isna()] - # print(df) - - ## Insert data table into exfor_data if not df.empty: insert_df_to_data(df) + print(df) return df diff --git a/tabulated/exfor_reaction_mt.py b/tabulated/exfor_reaction_mt.py index 4f2a661..c367c6e 100644 --- a/tabulated/exfor_reaction_mt.py +++ b/tabulated/exfor_reaction_mt.py @@ -120,7 +120,6 @@ def mt_to_reaction(): - def get_mf(react_dict): if sf_to_mf.get(react_dict["sf6"]): @@ -143,16 +142,13 @@ def get_mf(react_dict): def get_mt(react_dict): if react_dict["sf6"] == "FY": - return int( mt_fy_sf5[ react_dict["sf5"] ]["mt"] ) if react_dict["sf5"] and mt_fy_sf5[ react_dict["sf5"] ]["mt"] else None + return int( mt_fy_sf5[ react_dict["sf5"] ]["mt"] ) if react_dict["sf5"] and mt_fy_sf5.get( react_dict["sf5"] ) else None elif react_dict["sf6"] == "NU": - return int( mt_nu_sf5[ react_dict["sf5"] ]["mt"] ) if react_dict["sf5"] and mt_nu_sf5[ react_dict["sf5"] ]["mt"] else None + return int( mt_nu_sf5[ react_dict["sf5"] ]["mt"] ) if react_dict["sf5"] and mt_nu_sf5.get( react_dict["sf5"] ) and mt_nu_sf5[ react_dict["sf5"] ]["mt"] else None else: - if ( - react_dict["process"].split(",")[0] == "N" - and react_dict["process"].split(",")[1] == "INL" - ): + if react_dict["process"] == "N,INL": return 4 elif ( diff --git a/utilities/elem.py b/utilities/elem.py deleted file mode 100644 index 1167853..0000000 --- a/utilities/elem.py +++ /dev/null @@ -1,168 +0,0 @@ -#################################################################### -# -# This file is part of exfor-parser. -# Copyright (C) 2022 International Atomic Energy Agency (IAEA) -# -# Disclaimer: The code is still under developments and not ready -# to use. It has been made public to share the progress -# among collaborators. -# Contact: nds.contact-point@iaea.org -# -#################################################################### -ELEMS = [ - "H", - "He", - "Li", - "Be", - "B", - "C", - "N", - "O", - "F", - "Ne", - "Na", - "Mg", - "Al", - "Si", - "P", - "S", - "Cl", - "Ar", - "K", - "Ca", - "Sc", - "Ti", - "V", - "Cr", - "Mn", - "Fe", - "Co", - "Ni", - "Cu", - "Zn", - "Ga", - "Ge", - "As", - "Se", - "Br", - "Kr", - "Rb", - "Sr", - "Y", - "Zr", - "Nb", - "Mo", - "Tc", - "Ru", - "Rh", - "Pd", - "Ag", - "Cd", - "In", - "Sn", - "Sb", - "Te", - "I", - "Xe", - "Cs", - "Ba", - "La", - "Ce", - "Pr", - "Nd", - "Pm", - "Sm", - "Eu", - "Gd", - "Tb", - "Dy", - "Ho", - "Er", - "Tm", - "Yb", - "Lu", - "Hf", - "Ta", - "W", - "Re", - "Os", - "Ir", - "Pt", - "Au", - "Hg", - "Tl", - "Pb", - "Bi", - "Po", - "At", - "Rn", - "Fr", - "Ra", - "Ac", - "Th", - "Pa", - "U", - "Np", - "Pu", - "Am", - "Cm", - "Bk", - "Cf", - "Es", - "Fm", - "Md", - "No", - "Lr", - "Rf", - "Db", - "Sg", - "Bh", - "Hs", - "Mt", - "Ds", - "Rg", - "112", - "113", - "114", - "115", - "116", - "117", - "118", - "119", - "120", -] - - -def ztoelem(z): - if z == 0: - elem_name = "n" - else: - try: - z = z - 1 - elem_name = ELEMS[z] - except ValueError: - elem_name = "" - # print(elem_name.capitalize()) - # return elem_name.upper() - return elem_name.capitalize() - - -def elemtoz(elem): - try: - z = ELEMS.index(elem) - z = z + 1 - z = str(z).zfill(3) - except ValueError: - z = "" - return z - - -def numtoisomer(num): - if num == "0": - return "G" - elif num == "1": - return "M1" - elif num == "2": - return "M2" - elif num == "3": - return "M3" diff --git a/utilities/utilities.py b/utilities/utilities.py deleted file mode 100644 index 8f33273..0000000 --- a/utilities/utilities.py +++ /dev/null @@ -1,193 +0,0 @@ -#################################################################### -# -# This file is part of exfor-parser. -# Copyright (C) 2022 International Atomic Energy Agency (IAEA) -# -# Disclaimer: The code is still under developments and not ready -# to use. It has been made public to share the progress -# among collaborators. -# Contact: nds.contact-point@iaea.org -# -#################################################################### -import os -import shutil -import time -import json -import sys -import os -import re -import itertools -import json -import time -import os - - -sys.path.append("../") -from config import OUT_PATH, EXFOR_MASTER_REPO_PATH - - -def corr(invalue): - if re.search(r"\d|\.[+]\d", invalue): - # invalue = invalue.replace("+", "E+") - invalue = re.sub(r"(\d|\.)([+])(\d)", r"\1E+\3", invalue) - if re.search(r"\d|\.[-]\d", invalue): - # invalue = invalue.replace("-", "E-") - invalue = re.sub(r"(\d|\.)([-])(\d)", r"\1E-\3", invalue) - return invalue - - -def extract_key(v): - return v[0] - - -def combine_dict(d1, d2): - return { - k: list(d[k] for d in (d1, d2) if k in d) - for k in set(d1.keys()) | set(d2.keys()) - } - - -def dict_merge(dicts_list): - d = {**dicts_list[0]} - for entry in dicts_list[1:]: - # print("entry:", entry) - for k, v in entry.items(): - d[k] = ( - [d[k], v] - if k in d and type(d[k]) != list - else [*d[k] + v] - if k in d - else v - ) - return d - - -def get_key_from_value(d, val): - keys = [k for k, v in d.items() if v == val] - if keys: - return keys[0] - return None - - -def check_list(init_list): - print(init_list) - # print(any(isinstance(i, list) for i in init_list)) - - def _is_list_instance(init_list): - print(isinstance(init_list, list)) - - sub_list = flatten_list(init_list) - _is_list_instance(sub_list) - - return isinstance(init_list, list) - - -def flatten(xs): - from collections.abc import Iterable - - for x in xs: - if isinstance(x, Iterable) and not isinstance(x, (str, bytes)): - yield from flatten(x) - else: - yield x - - -def flatten_list(list): - return [item for sublist in list for item in sublist] - - -def flatten_list_original(list): - # https://geekflare.com/flatten-list-python/ - # flat_list = itertools.chain(*list) - return itertools.chain(*list) - - -def toJSON(self): - return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4) - - -def is_pointer(dict, key): - if len(dict[key].keys()) == 1: - return False - else: - return True - - -def conv_unit(value, factor): - return value * factor - - -def reaction_to_mtmf(process, sf5, sf6): - print(process, sf5, sf6) - - -def slices(s, *args): - position = 0 - for length in args: - yield s[position : position + length] - position += length - - -def del_file(fname): - os.remove(fname) - - -def del_outputs(name): - - path = os.path.join(OUT_PATH, name) - - if os.path.exists(path): - shutil.rmtree(path) - - os.mkdir(path) - - -def rescue(processed): - lines = [] - if os.path.exists("processed.dat"): - with open("processed.dat") as f: - lines = f.readlines() - if processed in "".join(lines): - return True - - else: - with open(r"processed_id.dat", "a") as fp: - fp.write(processed + "\n") - return False - - else: - with open(r"processed_id.dat", "a") as fp: - fp.write(processed + "\n") - return False - - -def process_time(func): - """ - for debugging purpose, delete @decorator - """ - - def inner(*args): - start_time = time.time() - func(*args) - print(str(func), "--- %s seconds ---" % (time.time() - start_time)) - - return inner - - -def print_time(start_time=None): - if start_time: - str = "--- %s seconds ---" % (time.time() - start_time) - return str - - else: - return time.time() - - -def get_entry_update_date(): - d = {} - file = os.path.join(EXFOR_MASTER_REPO_PATH, "entry_updatedate.dat") - with open(file) as f: - for line in f: - x = line.split(" ") - d.update({x[0]: {"last_update": x[1], "revisions": x[2].strip()}}) - return d