Skip to content

Commit

Permalink
Feature/v4 integration (#173)
Browse files Browse the repository at this point in the history
* add class of business detection

* Point package builder to OED4 pre-release spec file

* Set package to version 4.0.0

* Fix default for do_disaggregation to true (#169)

* OED v4 release (#170)

* branches renamed in OED repo (#172)

* test oed_v4

* pep8

* ep8

* fixup test

* pep8

---------

Co-authored-by: Sam Gamble <[email protected]>
Co-authored-by: awsbuild <[email protected]>
Co-authored-by: sambles <[email protected]>
  • Loading branch information
4 people authored Jan 30, 2025
1 parent 3c429e9 commit 9ec9758
Show file tree
Hide file tree
Showing 9 changed files with 784 additions and 28 deletions.
2 changes: 1 addition & 1 deletion ods_tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '3.2.9'
__version__ = '4.0.0'

import logging

Expand Down
590 changes: 588 additions & 2 deletions ods_tools/data/model_settings_schema.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion ods_tools/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def check(**kwargs):
"""run the check command on Exposure"""
logger = logging.getLogger(__name__)
args_set = {k for k, v in kwargs.items() if v is not None}
args_exp = set(['location', 'account', 'ri_info', 'ri_scope'])
args_exp = set(['location', 'account', 'ri_info', 'ri_scope', 'oed_dir'])

try:
if args_exp.intersection(set(args_set)):
Expand Down
4 changes: 2 additions & 2 deletions ods_tools/oed/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from .common import (
OdsException, PANDAS_COMPRESSION_MAP, PANDAS_DEFAULT_NULL_VALUES, USUAL_FILE_NAME, OED_TYPE_TO_NAME,
OED_NAME_TO_TYPE, OED_IDENTIFIER_FIELDS, VALIDATOR_ON_ERROR_ACTION, DEFAULT_VALIDATION_CONFIG, OED_PERIL_COLUMNS, fill_empty,
UnknownColumnSaveOption, BLANK_VALUES, is_empty
UnknownColumnSaveOption, BLANK_VALUES, is_empty, ClassOfBusiness
)


Expand All @@ -15,5 +15,5 @@
'AnalysisSettingHandler', 'ModelSettingHandler', 'ModelSettingSchema', 'AnalysisSettingSchema',
'OdsException', 'PANDAS_COMPRESSION_MAP', 'PANDAS_DEFAULT_NULL_VALUES', 'USUAL_FILE_NAME', 'OED_TYPE_TO_NAME',
'OED_NAME_TO_TYPE', 'OED_IDENTIFIER_FIELDS', 'VALIDATOR_ON_ERROR_ACTION', 'DEFAULT_VALIDATION_CONFIG', 'OED_PERIL_COLUMNS', 'fill_empty',
'UnknownColumnSaveOption', 'BLANK_VALUES', 'is_empty',
'UnknownColumnSaveOption', 'BLANK_VALUES', 'is_empty', 'ClassOfBusiness'
] # this is necessary for flake8 to pass, otherwise you get an unused import error
55 changes: 54 additions & 1 deletion ods_tools/oed/common.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""
common static variable and ods_tools exceptions
"""
import enum

from urllib.parse import urlparse
from pathlib import Path
import numpy as np
Expand Down Expand Up @@ -125,9 +127,60 @@ def __get__(self, obj, type=None):
'ReinsScope': ['ReinsNumber', 'PortNumber', 'AccNumber', 'LocNumber']
}


class ClassOfBusiness(enum.Enum):
prop = 'PROP'
mar = 'MAR'
cyb = 'CYB'
liabs = 'LIABS'


CLASS_OF_BUSINESSES = {
ClassOfBusiness.prop: {
'name': 'Property',
'field_status_name': 'Property field status',
'subject_at_risk_source': 'location',
'subject_at_risk_id_fields': ['PortNumber', 'AccNumber', 'LocNumber'],
'coherence_rules': [
{"name": "location", "type": "R", "r_sources": ["location"], },
{"name": "reinsurance", "type": "CR", "c_sources": ["ri_info", "ri_scope"], "r_sources": ["account"]}
],
},
ClassOfBusiness.mar: {
'name': 'Marine Cargo',
'field_status_name': 'Marine Cargo field status',
'subject_at_risk_source': 'location',
'subject_at_risk_id_fields': ['PortNumber', 'AccNumber', 'LocNumber'],
'coherence_rules': [
{"name": "location", "type": "R", "r_sources": ["location"], },
{"name": "reinsurance", "type": "CR", "c_sources": ["ri_info", "ri_scope"], "r_sources": ["account"]}
],
},
ClassOfBusiness.cyb: {
'name': 'Cyber',
'field_status_name': 'Cyber field status',
'subject_at_risk_source': 'account',
'subject_at_risk_id_fields': ['PortNumber', 'AccNumber'],
'coherence_rules': [
{"name": "account", "type": "R", "r_sources": ["account"]},
{"name": "reinsurance", "type": "CR", "c_sources": ["ri_info", "ri_scope"]}
]
},
ClassOfBusiness.liabs: {
'name': 'Liability',
'field_status_name': 'Liability field status',
'subject_at_risk_source': 'account',
'subject_at_risk_id_fields': ['PortNumber', 'AccNumber'],
'coherence_rules': [
{"name": "account", "type": "R", "r_sources": ["account"]},
{"name": "reinsurance", "type": "CR", "c_sources": ["ri_info", "ri_scope"]}
]
},
}

VALIDATOR_ON_ERROR_ACTION = {'raise', 'log', 'ignore', 'return'}
DEFAULT_VALIDATION_CONFIG = [
{'name': 'source_coherence', 'on_error': 'raise'},
{'name': 'source_coherence', 'on_error': 'log'},
{'name': 'required_fields', 'on_error': 'raise'},
{'name': 'unknown_column', 'on_error': 'raise'},
{'name': 'valid_values', 'on_error': 'raise'},
Expand Down
61 changes: 58 additions & 3 deletions ods_tools/oed/exposure.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@

from .common import (PANDAS_COMPRESSION_MAP,
USUAL_FILE_NAME, OED_TYPE_TO_NAME,
UnknownColumnSaveOption)
UnknownColumnSaveOption, CLASS_OF_BUSINESSES, OdsException,
ClassOfBusiness)
from .oed_schema import OedSchema
from .source import OedSource
from .validator import Validator
Expand All @@ -30,14 +31,15 @@ class OedExposure:
"""
DEFAULT_EXPOSURE_CONFIG_NAME = 'exposure_info.json'

def __init__(self,
def __init__(self, *,
location=None,
account=None,
ri_info=None,
ri_scope=None,
oed_schema_info=None,
currency_conversion=None,
reporting_currency=None,
class_of_business=None,
check_oed=False,
use_field=False,
validation_config=None,
Expand Down Expand Up @@ -129,6 +131,8 @@ def fn(df):

self.reporting_currency = reporting_currency

self.class_of_business = class_of_business

self.validation_config = validation_config

if not working_dir:
Expand All @@ -139,6 +143,49 @@ def fn(df):
if check_oed:
self.check()

def get_class_of_business(self):
any_field_info = next(iter(self.get_input_fields('null').values()))
if 'Required Field' in any_field_info:
logger.debug(f"OED schema version < 4.0.0, only support {ClassOfBusiness.prop}")
return ClassOfBusiness.prop
class_of_businesses = set(CLASS_OF_BUSINESSES)
exclusion_messages = {}

for oed_source in self.get_oed_sources():
present_field = set(field_info['Input Field Name'] for field_info in oed_source.get_column_to_field().values())
for field_info in self.get_input_fields(oed_source.oed_type).values():
for class_of_business in class_of_businesses:
cob_field_status = CLASS_OF_BUSINESSES[class_of_business]['field_status_name']
if field_info.get(cob_field_status) == 'R':
if field_info['Input Field Name'] not in present_field:
exclusion_messages.setdefault(class_of_business, {}).setdefault('missing', []).append(field_info['Input Field Name'])
elif field_info.get(cob_field_status) == 'n/a':
if field_info['Input Field Name'] in present_field:
exclusion_messages.setdefault(class_of_business, {}).setdefault('present', []).append(field_info['Input Field Name'])

final_cobs = class_of_businesses.difference(exclusion_messages)
if len(final_cobs) == 1:
final_cobs = final_cobs.pop()
logger.info(f"detected class of business is {final_cobs}")
return final_cobs
elif len(final_cobs) == 0:
error_msg = "\n".join(f"{class_of_business}:"
+ ("\n " + ", ".join(messages['missing']) + " missing" if messages.get('missing') else "")
+ ("\n " + ", ".join(messages['present']) + " present" if messages.get('present') else "")
for class_of_business, messages in exclusion_messages.items())
raise OdsException(error_msg)
elif len(final_cobs) == 2 and len(final_cobs.difference({ClassOfBusiness.prop, ClassOfBusiness.mar})) == 0:
# Marine and Property have mostly the same column, default to Property if undetermined
return ClassOfBusiness.prop
else:
raise OdsException(f"could not determine the COB of the exposure between those {final_cobs}")

@property
def class_of_business_info(self):
if self.class_of_business is None:
self.class_of_business = self.get_class_of_business()
return CLASS_OF_BUSINESSES[self.class_of_business]

@classmethod
def resolve_oed_info(cls, oed_info, df_engine):
if isinstance(oed_info, (str, Path)):
Expand Down Expand Up @@ -208,7 +255,7 @@ def find_fp(names):
kwargs['working_dir'] = oed_dir

missing_files = [file for file, found in files_found.items() if not found]
if missing_files:
if missing_files and False:
raise FileNotFoundError(f"Files not found in current path ({oed_dir}): {', '.join(missing_files)}")

return cls(**{**config, **kwargs})
Expand Down Expand Up @@ -256,6 +303,11 @@ def get_oed_sources(self):
if oed_source:
yield oed_source

def get_subject_at_risk_source(self) -> OedSource:
if self.class_of_business is None:
self.class_of_business = self.get_class_of_business()
return getattr(self, CLASS_OF_BUSINESSES[self.class_of_business]['subject_at_risk_source'])

def save_config(self, filepath):
"""
save data to directory, loadable later on
Expand Down Expand Up @@ -327,6 +379,9 @@ def check(self, validation_config=None):
OdsException if some invalid data is found
"""
if self.class_of_business is None:
self.class_of_business = self.get_class_of_business()

if validation_config is None:
validation_config = self.validation_config
validator = Validator(self)
Expand Down
5 changes: 4 additions & 1 deletion ods_tools/oed/oed_schema.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import json
import logging
import os
from pathlib import Path
import numba as nb
import numpy as np

from .common import OdsException, BLANK_VALUES, cached_property, dtype_to_python

logger = logging.getLogger(__name__)

ENV_ODS_SCHEMA_PATH = os.getenv('ODS_SCHEMA_PATH')


Expand Down Expand Up @@ -74,6 +77,7 @@ def from_oed_schema_info(cls, oed_schema_info):
elif isinstance(oed_schema_info, cls):
return oed_schema_info
elif oed_schema_info is None:
logger.debug(f"loading default schema {cls.DEFAULT_ODS_SCHEMA_PATH}")
return cls.from_json(cls.DEFAULT_ODS_SCHEMA_PATH)
else:
raise OdsException(f"{oed_schema_info} is not a supported format to create {cls} object")
Expand Down Expand Up @@ -245,7 +249,6 @@ def is_valid_value(value, valid_ranges, allow_blanks):
Returns:
True if value is in one of the range
"""

if value in BLANK_VALUES:
return allow_blanks
for valid_range in valid_ranges:
Expand Down
38 changes: 22 additions & 16 deletions ods_tools/oed/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pathlib import Path
from collections.abc import Iterable

from .common import (OdsException, OED_PERIL_COLUMNS, OED_IDENTIFIER_FIELDS, DEFAULT_VALIDATION_CONFIG,
from .common import (OdsException, OED_PERIL_COLUMNS, OED_IDENTIFIER_FIELDS, DEFAULT_VALIDATION_CONFIG, CLASS_OF_BUSINESSES,
VALIDATOR_ON_ERROR_ACTION, BLANK_VALUES, is_empty)
from .oed_schema import OedSchema

Expand Down Expand Up @@ -92,19 +92,24 @@ def invalid_data_to_str(_data):
def check_source_coherence(self):
""""""
invalid_data = []
if not self.exposure.location:
invalid_data.append({'name': 'location', 'source': None,
'msg': f"Exposure needs a Location file, location={self.exposure.location}"})

if self.exposure.ri_info or self.exposure.ri_scope:
if not self.exposure.account:
invalid_data.append({'name': 'account', 'source': None,
'msg': f"Exposure needs account if reinsurance is provided account={self.exposure.account}"})

if not self.exposure.ri_info and self.exposure.ri_scope:
invalid_data.append({'name': 'reinsurance', 'source': None,
'msg': f"Exposure needs both ri_scope and ri_scope for reinsurance"
f"ri_info={self.exposure.ri_info} ri_scope={self.exposure.ri_scope}"})
coherence_rules = CLASS_OF_BUSINESSES[self.exposure.class_of_business]['coherence_rules']
for coherence_rule in coherence_rules:
r_sources = []
if coherence_rule["type"] == "CR":
c_sources = [getattr(self.exposure, source) for source in coherence_rule["c_sources"]]
if any(c_sources):
if not all(c_sources):
invalid_data.append(
{'name': coherence_rule['name'], 'source': None,
'msg': f"Exposure needs all {coherence_rule['c_sources']} for {coherence_rule['name']}"
f" got {c_sources}"})
r_sources = [getattr(self.exposure, source) for source in coherence_rule.get("r_sources", [])]
elif coherence_rule["type"] == "R":
r_sources = [getattr(self.exposure, source) for source in coherence_rule["r_sources"]]

if not all(r_sources):
invalid_data.append({'name': coherence_rule['name'], 'source': None,
'msg': f"Exposure needs {coherence_rule['r_sources']}, got={r_sources}"})

return invalid_data

Expand All @@ -126,8 +131,9 @@ def check_required_fields(self):

for field_info in input_fields.values():
if field_info['Input Field Name'] not in field_to_columns:
# OED v4 = 'Property field status' and OED v3 = 'Required Field'
requ_field_ref = 'Property field status' if 'Property field status' in field_info else 'Required Field'
requ_field_ref = CLASS_OF_BUSINESSES[self.exposure.class_of_business]['field_status_name']
if requ_field_ref not in field_info: # OED v3 only support PROP and used 'Required Field'
requ_field_ref = 'Required Field'
if field_info.get(requ_field_ref) == 'R':
invalid_data.append({'name': oed_source.oed_name, 'source': oed_source.current_source,
'msg': f"missing required column {field_info['Input Field Name']}"})
Expand Down
55 changes: 54 additions & 1 deletion tests/test_ods_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
sys.path.append(sys.path.pop(0))

from ods_tools.main import convert
from ods_tools.oed import OedExposure, OedSchema, OdsException, ModelSettingSchema, AnalysisSettingSchema, OED_TYPE_TO_NAME, UnknownColumnSaveOption
from ods_tools.oed import (OedExposure, OedSchema, OdsException, ModelSettingSchema, AnalysisSettingSchema, OED_TYPE_TO_NAME, UnknownColumnSaveOption,
ClassOfBusiness)
from ods_tools.odtf.controller import transform_format

logger = logging.getLogger(__file__)
Expand Down Expand Up @@ -115,6 +116,58 @@ def test_load_oed_from_config(self):
exposure2 = OedExposure(**config)
self.assertTrue(exposure.location.dataframe.equals(exposure2.location.dataframe))

def test_oed_V3(self):
with tempfile.TemporaryDirectory() as tmp_run_dir:
with open(os.path.join(tmp_run_dir, 'OpenExposureData_Spec.json'), 'wb') as schema_file:
schema_file.write(
urllib.request.urlopen('https://github.com/OasisLMF/ODS_OpenExposureData/releases/download/3.4.0/OpenExposureData_Spec.json')
.read())

config = {
'location': base_url + '/SourceLocOEDPiWind.csv',
'account': base_url + '/SourceAccOEDPiWind.csv',
'ri_info': base_url + '/SourceReinsInfoOEDPiWind.csv',
'ri_scope': base_url + '/SourceReinsScopeOEDPiWind.csv',
'oed_schema_info': os.path.join(tmp_run_dir, 'OpenExposureData_Spec.json'),
'check_oed': True,
'use_field': True,
}
assert OedExposure(**config).class_of_business == ClassOfBusiness.prop

def test_oed_cyber_example(self):
oed_example_url = "https://raw.githubusercontent.com/OasisLMF/ODS_OpenExposureData/refs/heads/main/Examples"
config = {
'account': oed_example_url + '/cyber_account.csv',
'check_oed': True, # issue with current marine exemple set to true and remove the correction when fixed
'use_field': True,
}
assert OedExposure(**config).class_of_business == ClassOfBusiness.cyb

def test_oed_marinecargo_example(self):
oed_example_url = "https://raw.githubusercontent.com/OasisLMF/ODS_OpenExposureData/refs/heads/main/Examples"
config = {
'location': oed_example_url + '/marinecargo_location.csv',
'account': oed_example_url + '/marinecargo_account.csv',
'check_oed': False, # issue with current marine exemple set to true and remove the correction when fixed
'use_field': True,
}
## marine example manual fixup ###
exposure = OedExposure(**config)
exposure.account.dataframe["PolDedType6All"] = 1
config['account'] = exposure.account.dataframe
config['check_oed'] = True
#####
assert OedExposure(**config).class_of_business in [ClassOfBusiness.prop, ClassOfBusiness.mar]

def test_oed_liability_example(self):
oed_example_url = "https://raw.githubusercontent.com/OasisLMF/ODS_OpenExposureData/refs/heads/main/Examples"
config = {
'account': oed_example_url + '/liability_account.csv',
'check_oed': True,
'use_field': True,
}
assert OedExposure(**config).class_of_business == ClassOfBusiness.liabs

def test_categorical_with_default(self):
# UseReinsDates is a string column with a non null default, check default setting works
with tempfile.TemporaryDirectory() as tmp_run_dir:
Expand Down

0 comments on commit 9ec9758

Please sign in to comment.