Skip to content

Commit ad0ebc1

Browse files
committed
add flat_mean utility function
1 parent f402539 commit ad0ebc1

File tree

5 files changed

+37
-7
lines changed

5 files changed

+37
-7
lines changed

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,7 @@ We also build on top of many great packages. Please check them out!
650650
- [Revealing the Galaxy-Halo Connection Through Machine Learning](https://arxiv.org/pdf/2204.10332.pdf)
651651
- [How the Galaxy–Halo Connection Depends on Large-Scale Environment](https://arxiv.org/pdf/2402.07995.pdf)
652652
- [Explainable Artificial Intelligence for COVID-19 Diagnosis Through Blood Test Variables](https://link.springer.com/content/pdf/10.1007/s40313-021-00858-y.pdf)
653+
- [A diagnostic support system based on interpretable machine learning and oscillometry for accurate diagnosis of respiratory dysfunction in silicosis](https://www.biorxiv.org/content/10.1101/2025.01.08.632001v1.full.pdf)
653654
- [Using Explainable Boosting Machines (EBMs) to Detect Common Flaws in Data](https://link.springer.com/chapter/10.1007/978-3-030-93736-2_40)
654655
- [Differentially Private Gradient Boosting on Linear Learners for Tabular Data Analysis](https://assets.amazon.science/fa/3a/a62ba73f4bbda1d880b678c39193/differentially-private-gradient-boosting-on-linear-learners-for-tabular-data-analysis.pdf)
655656
- [Differentially private and explainable boosting machine with enhanced utility](https://www.sciencedirect.com/science/article/abs/pii/S0925231224011950)
@@ -659,6 +660,7 @@ We also build on top of many great packages. Please check them out!
659660
- [Towards Cleaner Cities: Estimating Vehicle-Induced PM2.5 with Hybrid EBM-CMA-ES Modeling](https://www.mdpi.com/2305-6304/12/11/827)
660661
- [Using machine learning to assist decision making in the assessment of mental health patients presenting to emergency departments](https://journals.sagepub.com/doi/full/10.1177/20552076241287364)
661662
- [Proposing an inherently interpretable machine learning model for shear strength prediction of reinforced concrete beams with stirrups](https://pdf.sciencedirectassets.com/287527/1-s2.0-S2214509523X00035/1-s2.0-S2214509524005011/main.pdf?X-Amz-Security-Token=IQoJb3JpZ2luX2VjECUaCXVzLWVhc3QtMSJGMEQCIB0r0KsYBZufOjbCVtUtozwn1QKMdLt2tbbfhuJKjWlXAiB5Dfr7p0yyj%2FSfypTLmjPL8WbjGAB3tRACFjyyqQbbfiq8BQiu%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F8BEAUaDDA1OTAwMzU0Njg2NSIMqBpZ2HmN91c%2BJPqpKpAFZtvqQjCScZa4FN%2FeubsPzOk5c%2B58LliO4Zr%2Bn1pm3vtW4I9I1vA29pkhT5was1N3ccPPIm2jNLwJ%2FHiZej7A2SmFv13Ro3sTvhqG%2F6A9Xx70Nx9jOlDPJUmCypKadKp0FGfuhZQuxeN0b%2F1QUUQZG4RpxC%2FXorRRHmb%2FrXcOWBwu4PmLZAkWmTKpncjDI7oj8eh8yBe6%2FA3JkJ14ZyBgR7JnPzR2ZqMdIhvlKoyMn6EnL1Azq2y3qwEMdzSCvz3wH3sT4pClc2vPs6ruQS4CdT3E7BHrf42Q0VnUXWjuy7gt9iRr0vaWR3tD%2FxyrrEKw7XuMHO9L4rQ4Pfn1dhGZ2J8H5ocwJGSh13U5fY6noyaTNViqvHx1oHNMWL03QpkJxmUxYquBWepcDjxEc32V6eGF7Ecm8Vij3s20wdRNcHqxGFKlUCgph48CKUA79iwSGQCkWQh7bq%2FTtowTbSPud7l8xeG1MvfIVy%2B6yzrjqygvPBQs3qkvdoWUrKXe57bhr2jEkKlSdYyp2TJMD6yoYRdTPyFx5xb0KgIt6KQTPmfbqYXkd3FFz3uc0HmWC5NQz6qP9UzNcBhcK8dXo3Dw042pl0HLO1njFaa%2BBfbT89VUVUIqjrAcmHweIl1v7Eyldzr%2BGBXIlsxPO3gPzyPLF2LTggc6dA%2Bswxmgmkv%2B7n5pU5%2F5sxvEhemb%2Fqu%2B8d47O%2Bn6RH8fL4eLGGL2d0dvFvyE7gEwt%2BaU9HsIN0IHqyH5VmaTF5zaKy%2Fn%2BhkF8yGpe5Hq5yNOUGrfQgfyFn4Kqd%2FTVajxIFzk8DEY%2F%2FFtyGJ%2B8BrHV4P%2FYs8R4XcBzPQtyrTuUC1CGmF01Tc2gnnEo4pVPaIjfBk9B%2BXVMc3Mu4Ywy4L%2BsgY6sgFK3hFIXjIfoVjqrIlBvsGYaFiZB1bVKBVy3DRiBgozzYmIVhipN%2FS%2BPok1oETqvYVvLqEVkGcb5W7nUIK16lFgjwDq6ePuxdqSafgOw5jVQroNsDCPRz8B%2F4fg7kv6gs4R9SX7gCaQ2V7L6NxqJDUUqsCMtIYq05Qx43dGByqLoVEz9USpRBmTLQwpGvOmUaGNNwTsCwmt5gRP8UX3CnkwI%2FydxmhrXLEdaUIFVwJbIor9&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20240604T221639Z&X-Amz-SignedHeaders=host&X-Amz-Expires=300&X-Amz-Credential=ASIAQ3PHCVTY4E2DAHPF%2F20240604%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Signature=eece32da8855b55208baecc0ce041e79aa03be1c292b58c67ce0215de36cbdb4&hash=46dd1da122f4cea242c6444a811fb16dde5cb8465e88552ac3eaeee97b975e9b&host=68042c943591013ac2b2430a89b270f6af2c76d8dfd086a07176afe7c76c2c61&pii=S2214509524005011&tid=spdf-45c1c4d1-dd97-4c0d-a04f-c30843a79e78&sid=1fea53ed2d5cf1443e4a7c4-33f4bf6475e1gxrqa&type=client&tsoh=d3d3LnNjaWVuY2VkaXJlY3QuY29t&ua=0f155c5f060d565b01055d&rr=88eb49dd2a5f7688&cc=us)
663+
- [A hybrid machine learning approach for predicting fiber-reinforced polymer-concrete interface bond strength](https://download.ssrn.com/eaai/e646e179-ec4a-4987-80b5-8d6bbf43ceda-meca.pdf?response-content-disposition=inline&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEBMaCXVzLWVhc3QtMSJHMEUCIFVH%2Ba5TT2NOEqgCl7GMhXBXBZWE9VzzcRFT6kYXzdxYAiEA4yvXsrzNQnNq%2BkJRB0rw1d2p35f418pIO%2FT3PHKoZ%2BoqxgUI%2B%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARAEGgwzMDg0NzUzMDEyNTciDCD0kCrKAqamcwb9LCqaBb4zlqjDhNBhf%2Frbe%2FX3lzSjvS58HiJQtbOHmzaM7putg93e7Wk8nPesoiupTH8uB5ejDC7stGJElRZp5ulT5M6CokoMu82ERn15kMpkgptj3MVEmsY9VTCP%2BCbROJ6v4YcAttOOAEzOc2M6li6o0w4IsF8DNXEIJr%2FJvjB3IDYPkrmpIiHl25h3AzfxPuOF01E2rgucLnY0xTyKGnPBBDZ%2FPtcuqlk2NKun3Q9HbcKj8EPJP%2FPupMW3IQvMnhcdJqqLHXs6wL1P42NTw5vtZO2W5WiEC1CNGDFUTSFRdb9hjhpH4JsYl8X%2BSFT6mZ31K2HTWeuigs5nXp1JN8r8r4O021yiVxHAJ6Chnddr0Z19iM5yOZA4H1EhO1rxxL0VF%2F%2F8Ac3GxuEfkBiug5wuL7aNlBNX6720pYfHH%2FgyrqdU5KSDIp8VYw3KgEij0LkizBHQIoolC48VAEMNc%2F8iWOdZpAVYprhEbABbff8%2BW6c4y1N9vmLTkjZkJtZODpzpQVjrHkL9hAOvmXZocEEN6maRoVJx3DlcTHrfQr8%2BQnPQnmajb5x0FHo44xxBIUt7UB4FOc6beDprle%2F7BO2SNEPLw6rJ9e3WJeVaYch46iqk2tiWFroNHDXlQ73CbzV59AEVtLAR29eIf7uyz%2BU0fOAXG5oAsJyB7YXUjH%2Bh79sxJgBq3%2FoqkEja06CFPRhWeqxixc8y9bEU%2FvvjhfbcWcxGY%2Be%2FwnXbemUbSyr26Y5xvADyicKIMexZNjeHBJ9MKMifQ9oh%2FjmudjxtMLbTpA6EAxMelLjhWcoURF0XeTttMEzEuTjO1OXUwMeXSPZ9roJqH3DB4PHi%2B8UIUG1JoVocv7wDu5ZVlMzgmDr0ti1BShKr9szxagq34jCEkJe8BjqxAbm7bsef33J3AImECx0GZeL0R2tFJZ7ctogL261zP7RqJ4T71rDMbpyfX6HfGuNEbWVROKHUexpuH8FZBodmn%2FjDjZSviK1oxQ1L5TDA2rwMsodnThreIad8vSXqxAzx9qng%2BeN2llXkNdIB7WEnkttzcJ24pZqwYnPI%2FsOznTq%2BDJ88mdNPtzph%2FGdVQcR99tV3waapotTEnUjjoqTTSh9aMgi1jIYMGMrJj6Jb4N%2FhWA%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20250114T024208Z&X-Amz-SignedHeaders=host&X-Amz-Expires=300&X-Amz-Credential=ASIAUPUUPRWEXKDDLJZE%2F20250114%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Signature=11c6f325f84736d5324ab155663c94231696de52be8910a03bb5e9c18f0d1689&abstractId=5055231)
662664
- [Using explainable machine learning and fitbit data to investigate predictors of adolescent obesity](https://www.nature.com/articles/s41598-024-60811-2)
663665
- [Interpretable Predictive Value of Including HDL-2b and HDL-3 in an Explainable Boosting Machine Model for Multiclass Classification of Coronary Artery Stenosis Severity in Acute Myocardial Infarction Patients](https://watermark.silverchair.com/ztae100.pdf?token=AQECAHi208BE49Ooan9kkhW_Ercy7Dm3ZL_9Cf3qfKAc485ysgAAA2owggNmBgkqhkiG9w0BBwagggNXMIIDUwIBADCCA0wGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQMnDqoUBnqG9Zyr0dAAgEQgIIDHT2M3owEzTRAV3KZzrOpzyqOYgClio-CQrzB5731fvsEe9ZWO_QfqQAKdaPyyOsEKjacd25hWs-_OvgXCqc36R4yFWu46PFOCApII2s3hbHYI1XEQozWfdyosgaQf_e7_5RIqIfwTEHt19LoYZuaDYjCqq2vmWOMZb6dNI6mz-h3Zd6BgbyYAFgRHiJfU94NU0Crf_AbbTx2jW3HqMBLYPn-ysUiyQYILNmqlKAAlw81ZjBwzusaQFsiJMCxwGyFHks7nwtnUQ8J5PU5Jelp8_fQ8x5_dlZvzvdkI9MR87zUkk4hm2XL0uyfvH92-7VV_2gMe-rU3aJZhbHJu2hENPDh_OmoDe7SOC-5EwPsgIDoDr_dgSgyhBMIbOk_TrSM4oEN6dbtvfLSDXQUWDV4semLuPjqz7WyiQz4PPt1mXuaf12X5xyVsf1Mms4UpGAKLyoCdJ-zDJ9csOPCefIsV2Bzs-KzaD63HWFLJuCU0hWIaK0QOcJATnpQb1PhFiAF6YZ_cCYTxkuAcrQyHS-WCEefNy8hB8PQXhNljtw0J499qdnLcNOM1gAQ3-o21KaTrEFs-DyvZwWmaGn8Zw1bK1CG8yVxWOh6_wjJpGjMMenstzrKFcLbJADs1yf3PuNGZds0g-Qf4NDcgsturcr0V1nLHVRFazWZhUKSeRnLjPzA5i3lVKnmwKjKa_50i0LMSIXNFS-dmvHs-qVUb8FO0_aKZ6egckXkoGG8w3Jox4MhhY2-B28Z0wbJOj8_DojCCtAmAPC0T5emRsuk1rkuRXIoMtFDWN0l7fr7RVkuy1TEd3mpa5UuU7Qo-wu_yqi6ibwLupjGeVN__7SeteoBSh8yFJgYN4BEiYmdkEX7DgKaMC90h5GakNJ7zeAPR9PFnQVRORoof04qMWK4aGod2igso1-qsCup-kVWmPy8zrQKlqxE4OCeqUpKQgZMUUAlFu643iuRnQuLnahXhui45TY8lS56XGCLqkwSG594lMoAXAYZ9tVFM4fAVwQJ3EWkJfHRRCWWGZfLwBPsdUnNEziGg4QIdrKhe-Fu7nLF)
664666
- [Estimate Deformation Capacity of Non-Ductile RC Shear Walls Using Explainable Boosting Machine](https://arxiv.org/pdf/2301.04652.pdf)
@@ -682,6 +684,8 @@ We also build on top of many great packages. Please check them out!
682684
- [Binary ECG Classification Using Explainable Boosting Machines for IoT Edge Devices](https://ieeexplore.ieee.org/document/9970834)
683685
- [Explainable artificial intelligence toward usable and trustworthy computer-aided diagnosis of multiple sclerosis from Optical Coherence Tomography](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10406231/)
684686
- [An Interpretable Machine Learning Model with Deep Learning-based Imaging Biomarkers for Diagnosis of Alzheimer’s Disease](https://arxiv.org/pdf/2308.07778.pdf)
687+
- [Prediction of Alzheimer Disease on the DARWIN Dataset with Dimensionality Reduction and Explainability Techniques](https://www.scitepress.org/Papers/2024/130174/130174.pdf)
688+
- [Explainable Boosting Machine for Predicting Alzheimer’s Disease from MRI Hippocampal Subfields](https://link.springer.com/chapter/10.1007/978-3-030-86993-9_31)
685689
- [Comparing explainable machine learning approaches with traditional statistical methods for evaluating stroke risk models: retrospective cohort study](https://pureadmin.qub.ac.uk/ws/portalfiles/portal/495863198/JMIR_Cardio.pdf)
686690
- [Explainable Artificial Intelligence for Cotton Yield Prediction With Multisource Data](https://ieeexplore.ieee.org/document/10214067)
687691
- [Preoperative detection of extraprostatic tumor extension in patients with primary prostate cancer utilizing](https://insightsimaging.springeropen.com/articles/10.1186/s13244-024-01876-5)
@@ -737,7 +741,6 @@ We also build on top of many great packages. Please check them out!
737741
- [Death by Round Numbers and Sharp Thresholds: How to Avoid Dangerous AI EHR Recommendations](https://www.medrxiv.org/content/10.1101/2022.04.30.22274520v1.full.pdf)
738742
- [Building a predictive model to identify clinical indicators for COVID-19 using machine learning method](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9037972/pdf/11517_2022_Article_2568.pdf)
739743
- [Using Innovative Machine Learning Methods to Screen and Identify Predictors of Congenital Heart Diseases](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8777022/pdf/fcvm-08-797002.pdf)
740-
- [Explainable Boosting Machine for Predicting Alzheimer’s Disease from MRI Hippocampal Subfields](https://link.springer.com/chapter/10.1007/978-3-030-86993-9_31)
741744
- [Impact of Accuracy on Model Interpretations](https://arxiv.org/pdf/2011.09903.pdf)
742745
- [Machine Learning Algorithms for Identifying Dependencies in OT Protocols](https://www.mdpi.com/1996-1073/16/10/4056)
743746
- [Causal Understanding of Why Users Share Hate Speech on Social Media](https://arxiv.org/pdf/2310.15772.pdf)

python/interpret-core/interpret/glassbox/_ebm/_ebm.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,8 +1011,11 @@ def fit(self, X, y, sample_weight=None, bags=None, init_score=None):
10111011
* bag[include_samples]
10121012
)
10131013

1014-
bagged_intercept[idx, :] = np.average(
1015-
y_local, weights=sample_weight_local
1014+
bagged_intercept[idx, :] = native.flat_mean(
1015+
y_local,
1016+
None
1017+
if sample_weight_local is None
1018+
else np.asarray(sample_weight_local, np.float64),
10161019
)
10171020
elif init_score is None:
10181021
if (
@@ -1472,7 +1475,7 @@ def fit(self, X, y, sample_weight=None, bags=None, init_score=None):
14721475
if objective_code == Native.Objective_MonoClassification:
14731476
pass
14741477
elif objective_code == Native.Objective_Rmse:
1475-
correction = np.average(y - scores, weights=sample_weight)
1478+
correction = native.flat_mean(y - scores, sample_weight)
14761479
intercept += correction
14771480
bagged_intercept += correction
14781481
else:

python/interpret-core/interpret/glassbox/_ebm/_utils.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ def _create_proportional_tensor(axis_weights):
148148

149149

150150
def process_bag_terms(intercept, term_scores, bin_weights):
151+
native = Native.get_native_singleton()
151152
for scores, weights in zip(term_scores, bin_weights):
152153
if develop.get_option("purify_result"):
153154
new_scores, add_impurities, add_intercept = purify(scores, weights)
@@ -165,7 +166,7 @@ def process_bag_terms(intercept, term_scores, bin_weights):
165166
temp_weights[ignored] = 0.0
166167

167168
if temp_weights.sum() != 0:
168-
mean = np.average(temp_scores, 0, temp_weights)
169+
mean = native.flat_mean(temp_scores, temp_weights)
169170
intercept += mean
170171
scores -= mean
171172
else:
@@ -178,7 +179,7 @@ def process_bag_terms(intercept, term_scores, bin_weights):
178179
temp_weights[ignored] = 0.0
179180

180181
if temp_weights.sum() != 0:
181-
mean = np.average(temp_scores, 0, temp_weights)
182+
mean = native.flat_mean(temp_scores, temp_weights)
182183
intercept[i] += mean
183184
scores[..., i] -= mean
184185

python/interpret-core/interpret/utils/_native.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import platform
99
import struct
1010
import sys
11+
import math
1112
from contextlib import AbstractContextManager
1213

1314
import numpy as np
@@ -236,6 +237,28 @@ def clean_float(self, val):
236237
)
237238
return val_array[0]
238239

240+
def flat_mean(self, vals, weights=None):
241+
if weights is not None:
242+
if vals.shape != weights.shape:
243+
msg = "vals and weights must have the same shape to call flat_mean."
244+
raise Exception(msg)
245+
246+
n_tensor_bins = math.prod(vals.shape)
247+
248+
mean_result = ct.c_double(np.nan)
249+
250+
return_code = self._unsafe.SafeMean(
251+
n_tensor_bins,
252+
1,
253+
Native._make_pointer(vals, np.float64, None),
254+
Native._make_pointer(weights, np.float64, None, True),
255+
ct.byref(mean_result),
256+
)
257+
if return_code: # pragma: no cover
258+
raise Native._get_native_exception(return_code, "SafeMean")
259+
260+
return mean_result
261+
239262
def safe_mean(self, tensor, weights=None):
240263
n_bags = tensor.shape[0]
241264
if weights is not None:

python/interpret-core/tests/glassbox/ebm/test_ebm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1254,7 +1254,7 @@ def test_identical_classification():
12541254
original = get_option("acceleration")
12551255
set_option("acceleration", 0)
12561256

1257-
for iteration in range(3):
1257+
for iteration in range(1):
12581258
total = 0.0
12591259
seed = 0
12601260
for i in range(10):

0 commit comments

Comments
 (0)