add flat_mean utility function

paulbkoch · paulbkoch · commit ad0ebc1c7fe3 · 2025-01-14T20:53:58.000-08:00
diff --git a/README.md b/README.md
@@ -650,6 +650,7 @@ We also build on top of many great packages. Please check them out!
 - [Revealing the Galaxy-Halo Connection Through Machine Learning](https://arxiv.org/pdf/2204.10332.pdf)
 - [How the Galaxy–Halo Connection Depends on Large-Scale Environment](https://arxiv.org/pdf/2402.07995.pdf)
 - [Explainable Artificial Intelligence for COVID-19 Diagnosis Through Blood Test Variables](https://link.springer.com/content/pdf/10.1007/s40313-021-00858-y.pdf)
+- [A diagnostic support system based on interpretable machine learning and oscillometry for accurate diagnosis of respiratory dysfunction in silicosis](https://www.biorxiv.org/content/10.1101/2025.01.08.632001v1.full.pdf)
 - [Using Explainable Boosting Machines (EBMs) to Detect Common Flaws in Data](https://link.springer.com/chapter/10.1007/978-3-030-93736-2_40)
 - [Differentially Private Gradient Boosting on Linear Learners for Tabular Data Analysis](https://assets.amazon.science/fa/3a/a62ba73f4bbda1d880b678c39193/differentially-private-gradient-boosting-on-linear-learners-for-tabular-data-analysis.pdf)
 - [Differentially private and explainable boosting machine with enhanced utility](https://www.sciencedirect.com/science/article/abs/pii/S0925231224011950)
@@ -659,6 +660,7 @@ We also build on top of many great packages. Please check them out!
 - [Towards Cleaner Cities: Estimating Vehicle-Induced PM2.5 with Hybrid EBM-CMA-ES Modeling](https://www.mdpi.com/2305-6304/12/11/827)
 - [Using machine learning to assist decision making in the assessment of mental health patients presenting to emergency departments](https://journals.sagepub.com/doi/full/10.1177/20552076241287364)
 - [Proposing an inherently interpretable machine learning model for shear strength prediction of reinforced concrete beams with stirrups](https://pdf.sciencedirectassets.com/287527/1-s2.0-S2214509523X00035/1-s2.0-S2214509524005011/main.pdf?X-Amz-Security-Token=IQoJb3JpZ2luX2VjECUaCXVzLWVhc3QtMSJGMEQCIB0r0KsYBZufOjbCVtUtozwn1QKMdLt2tbbfhuJKjWlXAiB5Dfr7p0yyj%2FSfypTLmjPL8WbjGAB3tRACFjyyqQbbfiq8BQiu%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F8BEAUaDDA1OTAwMzU0Njg2NSIMqBpZ2HmN91c%2BJPqpKpAFZtvqQjCScZa4FN%2FeubsPzOk5c%2B58LliO4Zr%2Bn1pm3vtW4I9I1vA29pkhT5was1N3ccPPIm2jNLwJ%2FHiZej7A2SmFv13Ro3sTvhqG%2F6A9Xx70Nx9jOlDPJUmCypKadKp0FGfuhZQuxeN0b%2F1QUUQZG4RpxC%2FXorRRHmb%2FrXcOWBwu4PmLZAkWmTKpncjDI7oj8eh8yBe6%2FA3JkJ14ZyBgR7JnPzR2ZqMdIhvlKoyMn6EnL1Azq2y3qwEMdzSCvz3wH3sT4pClc2vPs6ruQS4CdT3E7BHrf42Q0VnUXWjuy7gt9iRr0vaWR3tD%2FxyrrEKw7XuMHO9L4rQ4Pfn1dhGZ2J8H5ocwJGSh13U5fY6noyaTNViqvHx1oHNMWL03QpkJxmUxYquBWepcDjxEc32V6eGF7Ecm8Vij3s20wdRNcHqxGFKlUCgph48CKUA79iwSGQCkWQh7bq%2FTtowTbSPud7l8xeG1MvfIVy%2B6yzrjqygvPBQs3qkvdoWUrKXe57bhr2jEkKlSdYyp2TJMD6yoYRdTPyFx5xb0KgIt6KQTPmfbqYXkd3FFz3uc0HmWC5NQz6qP9UzNcBhcK8dXo3Dw042pl0HLO1njFaa%2BBfbT89VUVUIqjrAcmHweIl1v7Eyldzr%2BGBXIlsxPO3gPzyPLF2LTggc6dA%2Bswxmgmkv%2B7n5pU5%2F5sxvEhemb%2Fqu%2B8d47O%2Bn6RH8fL4eLGGL2d0dvFvyE7gEwt%2BaU9HsIN0IHqyH5VmaTF5zaKy%2Fn%2BhkF8yGpe5Hq5yNOUGrfQgfyFn4Kqd%2FTVajxIFzk8DEY%2F%2FFtyGJ%2B8BrHV4P%2FYs8R4XcBzPQtyrTuUC1CGmF01Tc2gnnEo4pVPaIjfBk9B%2BXVMc3Mu4Ywy4L%2BsgY6sgFK3hFIXjIfoVjqrIlBvsGYaFiZB1bVKBVy3DRiBgozzYmIVhipN%2FS%2BPok1oETqvYVvLqEVkGcb5W7nUIK16lFgjwDq6ePuxdqSafgOw5jVQroNsDCPRz8B%2F4fg7kv6gs4R9SX7gCaQ2V7L6NxqJDUUqsCMtIYq05Qx43dGByqLoVEz9USpRBmTLQwpGvOmUaGNNwTsCwmt5gRP8UX3CnkwI%2FydxmhrXLEdaUIFVwJbIor9&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20240604T221639Z&X-Amz-SignedHeaders=host&X-Amz-Expires=300&X-Amz-Credential=ASIAQ3PHCVTY4E2DAHPF%2F20240604%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Signature=eece32da8855b55208baecc0ce041e79aa03be1c292b58c67ce0215de36cbdb4&hash=46dd1da122f4cea242c6444a811fb16dde5cb8465e88552ac3eaeee97b975e9b&host=68042c943591013ac2b2430a89b270f6af2c76d8dfd086a07176afe7c76c2c61&pii=S2214509524005011&tid=spdf-45c1c4d1-dd97-4c0d-a04f-c30843a79e78&sid=1fea53ed2d5cf1443e4a7c4-33f4bf6475e1gxrqa&type=client&tsoh=d3d3LnNjaWVuY2VkaXJlY3QuY29t&ua=0f155c5f060d565b01055d&rr=88eb49dd2a5f7688&cc=us)
+- [A hybrid machine learning approach for predicting fiber-reinforced polymer-concrete interface bond strength](https://download.ssrn.com/eaai/e646e179-ec4a-4987-80b5-8d6bbf43ceda-meca.pdf?response-content-disposition=inline&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEBMaCXVzLWVhc3QtMSJHMEUCIFVH%2Ba5TT2NOEqgCl7GMhXBXBZWE9VzzcRFT6kYXzdxYAiEA4yvXsrzNQnNq%2BkJRB0rw1d2p35f418pIO%2FT3PHKoZ%2BoqxgUI%2B%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARAEGgwzMDg0NzUzMDEyNTciDCD0kCrKAqamcwb9LCqaBb4zlqjDhNBhf%2Frbe%2FX3lzSjvS58HiJQtbOHmzaM7putg93e7Wk8nPesoiupTH8uB5ejDC7stGJElRZp5ulT5M6CokoMu82ERn15kMpkgptj3MVEmsY9VTCP%2BCbROJ6v4YcAttOOAEzOc2M6li6o0w4IsF8DNXEIJr%2FJvjB3IDYPkrmpIiHl25h3AzfxPuOF01E2rgucLnY0xTyKGnPBBDZ%2FPtcuqlk2NKun3Q9HbcKj8EPJP%2FPupMW3IQvMnhcdJqqLHXs6wL1P42NTw5vtZO2W5WiEC1CNGDFUTSFRdb9hjhpH4JsYl8X%2BSFT6mZ31K2HTWeuigs5nXp1JN8r8r4O021yiVxHAJ6Chnddr0Z19iM5yOZA4H1EhO1rxxL0VF%2F%2F8Ac3GxuEfkBiug5wuL7aNlBNX6720pYfHH%2FgyrqdU5KSDIp8VYw3KgEij0LkizBHQIoolC48VAEMNc%2F8iWOdZpAVYprhEbABbff8%2BW6c4y1N9vmLTkjZkJtZODpzpQVjrHkL9hAOvmXZocEEN6maRoVJx3DlcTHrfQr8%2BQnPQnmajb5x0FHo44xxBIUt7UB4FOc6beDprle%2F7BO2SNEPLw6rJ9e3WJeVaYch46iqk2tiWFroNHDXlQ73CbzV59AEVtLAR29eIf7uyz%2BU0fOAXG5oAsJyB7YXUjH%2Bh79sxJgBq3%2FoqkEja06CFPRhWeqxixc8y9bEU%2FvvjhfbcWcxGY%2Be%2FwnXbemUbSyr26Y5xvADyicKIMexZNjeHBJ9MKMifQ9oh%2FjmudjxtMLbTpA6EAxMelLjhWcoURF0XeTttMEzEuTjO1OXUwMeXSPZ9roJqH3DB4PHi%2B8UIUG1JoVocv7wDu5ZVlMzgmDr0ti1BShKr9szxagq34jCEkJe8BjqxAbm7bsef33J3AImECx0GZeL0R2tFJZ7ctogL261zP7RqJ4T71rDMbpyfX6HfGuNEbWVROKHUexpuH8FZBodmn%2FjDjZSviK1oxQ1L5TDA2rwMsodnThreIad8vSXqxAzx9qng%2BeN2llXkNdIB7WEnkttzcJ24pZqwYnPI%2FsOznTq%2BDJ88mdNPtzph%2FGdVQcR99tV3waapotTEnUjjoqTTSh9aMgi1jIYMGMrJj6Jb4N%2FhWA%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20250114T024208Z&X-Amz-SignedHeaders=host&X-Amz-Expires=300&X-Amz-Credential=ASIAUPUUPRWEXKDDLJZE%2F20250114%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Signature=11c6f325f84736d5324ab155663c94231696de52be8910a03bb5e9c18f0d1689&abstractId=5055231)
 - [Using explainable machine learning and fitbit data to investigate predictors of adolescent obesity](https://www.nature.com/articles/s41598-024-60811-2)
 - [Interpretable Predictive Value of Including HDL-2b and HDL-3 in an Explainable Boosting Machine Model for Multiclass Classification of Coronary Artery Stenosis Severity in Acute Myocardial Infarction Patients](https://watermark.silverchair.com/ztae100.pdf?token=AQECAHi208BE49Ooan9kkhW_Ercy7Dm3ZL_9Cf3qfKAc485ysgAAA2owggNmBgkqhkiG9w0BBwagggNXMIIDUwIBADCCA0wGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQMnDqoUBnqG9Zyr0dAAgEQgIIDHT2M3owEzTRAV3KZzrOpzyqOYgClio-CQrzB5731fvsEe9ZWO_QfqQAKdaPyyOsEKjacd25hWs-_OvgXCqc36R4yFWu46PFOCApII2s3hbHYI1XEQozWfdyosgaQf_e7_5RIqIfwTEHt19LoYZuaDYjCqq2vmWOMZb6dNI6mz-h3Zd6BgbyYAFgRHiJfU94NU0Crf_AbbTx2jW3HqMBLYPn-ysUiyQYILNmqlKAAlw81ZjBwzusaQFsiJMCxwGyFHks7nwtnUQ8J5PU5Jelp8_fQ8x5_dlZvzvdkI9MR87zUkk4hm2XL0uyfvH92-7VV_2gMe-rU3aJZhbHJu2hENPDh_OmoDe7SOC-5EwPsgIDoDr_dgSgyhBMIbOk_TrSM4oEN6dbtvfLSDXQUWDV4semLuPjqz7WyiQz4PPt1mXuaf12X5xyVsf1Mms4UpGAKLyoCdJ-zDJ9csOPCefIsV2Bzs-KzaD63HWFLJuCU0hWIaK0QOcJATnpQb1PhFiAF6YZ_cCYTxkuAcrQyHS-WCEefNy8hB8PQXhNljtw0J499qdnLcNOM1gAQ3-o21KaTrEFs-DyvZwWmaGn8Zw1bK1CG8yVxWOh6_wjJpGjMMenstzrKFcLbJADs1yf3PuNGZds0g-Qf4NDcgsturcr0V1nLHVRFazWZhUKSeRnLjPzA5i3lVKnmwKjKa_50i0LMSIXNFS-dmvHs-qVUb8FO0_aKZ6egckXkoGG8w3Jox4MhhY2-B28Z0wbJOj8_DojCCtAmAPC0T5emRsuk1rkuRXIoMtFDWN0l7fr7RVkuy1TEd3mpa5UuU7Qo-wu_yqi6ibwLupjGeVN__7SeteoBSh8yFJgYN4BEiYmdkEX7DgKaMC90h5GakNJ7zeAPR9PFnQVRORoof04qMWK4aGod2igso1-qsCup-kVWmPy8zrQKlqxE4OCeqUpKQgZMUUAlFu643iuRnQuLnahXhui45TY8lS56XGCLqkwSG594lMoAXAYZ9tVFM4fAVwQJ3EWkJfHRRCWWGZfLwBPsdUnNEziGg4QIdrKhe-Fu7nLF)
 - [Estimate Deformation Capacity of Non-Ductile RC Shear Walls Using Explainable Boosting Machine](https://arxiv.org/pdf/2301.04652.pdf)
@@ -682,6 +684,8 @@ We also build on top of many great packages. Please check them out!
 - [Binary ECG Classification Using Explainable Boosting Machines for IoT Edge Devices](https://ieeexplore.ieee.org/document/9970834)
 - [Explainable artificial intelligence toward usable and trustworthy computer-aided diagnosis of multiple sclerosis from Optical Coherence Tomography](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10406231/)
 - [An Interpretable Machine Learning Model with Deep Learning-based Imaging Biomarkers for Diagnosis of Alzheimer’s Disease](https://arxiv.org/pdf/2308.07778.pdf)
+- [Prediction of Alzheimer Disease on the DARWIN Dataset with Dimensionality Reduction and Explainability Techniques](https://www.scitepress.org/Papers/2024/130174/130174.pdf)
+- [Explainable Boosting Machine for Predicting Alzheimer’s Disease from MRI Hippocampal Subfields](https://link.springer.com/chapter/10.1007/978-3-030-86993-9_31)
 - [Comparing explainable machine learning approaches with traditional statistical methods for evaluating stroke risk models: retrospective cohort study](https://pureadmin.qub.ac.uk/ws/portalfiles/portal/495863198/JMIR_Cardio.pdf)
 - [Explainable Artificial Intelligence for Cotton Yield Prediction With Multisource Data](https://ieeexplore.ieee.org/document/10214067)
 - [Preoperative detection of extraprostatic tumor extension in patients with primary prostate cancer utilizing](https://insightsimaging.springeropen.com/articles/10.1186/s13244-024-01876-5)
@@ -737,7 +741,6 @@ We also build on top of many great packages. Please check them out!
 - [Death by Round Numbers and Sharp Thresholds: How to Avoid Dangerous AI EHR Recommendations](https://www.medrxiv.org/content/10.1101/2022.04.30.22274520v1.full.pdf)
 - [Building a predictive model to identify clinical indicators for COVID-19 using machine learning method](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9037972/pdf/11517_2022_Article_2568.pdf)
 - [Using Innovative Machine Learning Methods to Screen and Identify Predictors of Congenital Heart Diseases](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8777022/pdf/fcvm-08-797002.pdf)
-- [Explainable Boosting Machine for Predicting Alzheimer’s Disease from MRI Hippocampal Subfields](https://link.springer.com/chapter/10.1007/978-3-030-86993-9_31)
 - [Impact of Accuracy on Model Interpretations](https://arxiv.org/pdf/2011.09903.pdf)
 - [Machine Learning Algorithms for Identifying Dependencies in OT Protocols](https://www.mdpi.com/1996-1073/16/10/4056)
 - [Causal Understanding of Why Users Share Hate Speech on Social Media](https://arxiv.org/pdf/2310.15772.pdf)
diff --git a/python/interpret-core/interpret/glassbox/_ebm/_ebm.py b/python/interpret-core/interpret/glassbox/_ebm/_ebm.py
@@ -1011,8 +1011,11 @@ def fit(self, X, y, sample_weight=None, bags=None, init_score=None):
                                 * bag[include_samples]
                             )
 
-                    bagged_intercept[idx, :] = np.average(
-                        y_local, weights=sample_weight_local
+                    bagged_intercept[idx, :] = native.flat_mean(
+                        y_local,
+                        None
+                        if sample_weight_local is None
+                        else np.asarray(sample_weight_local, np.float64),
                     )
             elif init_score is None:
                 if (
@@ -1472,7 +1475,7 @@ def fit(self, X, y, sample_weight=None, bags=None, init_score=None):
             if objective_code == Native.Objective_MonoClassification:
                 pass
             elif objective_code == Native.Objective_Rmse:
-                correction = np.average(y - scores, weights=sample_weight)
+                correction = native.flat_mean(y - scores, sample_weight)
                 intercept += correction
                 bagged_intercept += correction
             else:
diff --git a/python/interpret-core/interpret/glassbox/_ebm/_utils.py b/python/interpret-core/interpret/glassbox/_ebm/_utils.py
@@ -148,6 +148,7 @@ def _create_proportional_tensor(axis_weights):
 
 
 def process_bag_terms(intercept, term_scores, bin_weights):
+    native = Native.get_native_singleton()
     for scores, weights in zip(term_scores, bin_weights):
         if develop.get_option("purify_result"):
             new_scores, add_impurities, add_intercept = purify(scores, weights)
@@ -165,7 +166,7 @@ def process_bag_terms(intercept, term_scores, bin_weights):
             temp_weights[ignored] = 0.0
 
             if temp_weights.sum() != 0:
-                mean = np.average(temp_scores, 0, temp_weights)
+                mean = native.flat_mean(temp_scores, temp_weights)
                 intercept += mean
                 scores -= mean
         else:
@@ -178,7 +179,7 @@ def process_bag_terms(intercept, term_scores, bin_weights):
                 temp_weights[ignored] = 0.0
 
                 if temp_weights.sum() != 0:
-                    mean = np.average(temp_scores, 0, temp_weights)
+                    mean = native.flat_mean(temp_scores, temp_weights)
                     intercept[i] += mean
                     scores[..., i] -= mean
 
diff --git a/python/interpret-core/interpret/utils/_native.py b/python/interpret-core/interpret/utils/_native.py
@@ -8,6 +8,7 @@
 import platform
 import struct
 import sys
+import math
 from contextlib import AbstractContextManager
 
 import numpy as np
@@ -236,6 +237,28 @@ def clean_float(self, val):
         )
         return val_array[0]
 
+    def flat_mean(self, vals, weights=None):
+        if weights is not None:
+            if vals.shape != weights.shape:
+                msg = "vals and weights must have the same shape to call flat_mean."
+                raise Exception(msg)
+
+        n_tensor_bins = math.prod(vals.shape)
+
+        mean_result = ct.c_double(np.nan)
+
+        return_code = self._unsafe.SafeMean(
+            n_tensor_bins,
+            1,
+            Native._make_pointer(vals, np.float64, None),
+            Native._make_pointer(weights, np.float64, None, True),
+            ct.byref(mean_result),
+        )
+        if return_code:  # pragma: no cover
+            raise Native._get_native_exception(return_code, "SafeMean")
+
+        return mean_result
+
     def safe_mean(self, tensor, weights=None):
         n_bags = tensor.shape[0]
         if weights is not None:
diff --git a/python/interpret-core/tests/glassbox/ebm/test_ebm.py b/python/interpret-core/tests/glassbox/ebm/test_ebm.py
@@ -1254,7 +1254,7 @@ def test_identical_classification():
     original = get_option("acceleration")
     set_option("acceleration", 0)
 
-    for iteration in range(3):
+    for iteration in range(1):
         total = 0.0
         seed = 0
         for i in range(10):