Skip to content

Add code to Bundleio to generate error stats #12051

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions devtools/bundled_program/bundled_program.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
* Copyright 2025 Arm Limited and/or its affiliates.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
Expand Down Expand Up @@ -346,6 +347,116 @@ ET_NODISCARD Error load_bundled_input(
return Error::Ok;
}

ET_NODISCARD ErrorStats compute_method_output_error_stats(
Method& method,
SerializedBundledProgram* bundled_program_ptr,
size_t testset_idx) {
if (!bundled_program_flatbuffer::BundledProgramBufferHasIdentifier(
bundled_program_ptr)) {
// The input buffer should be a bundled program.
return {Error::InvalidArgument, 0, 0, 0, 0};
}

auto method_test = get_method_test_suite(
bundled_program_flatbuffer::GetBundledProgram(bundled_program_ptr),
method);

if (!method_test.ok()) {
return {method_test.error(), 0, 0, 0, 0};
}

auto test_cases = method_test.get()->test_cases();

if (testset_idx >= test_cases->size()) {
return {Error::InvalidArgument, 0, 0, 0, 0};
}
auto bundled_expected_outputs =
test_cases->Get(static_cast<flatbuffers::uoffset_t>(testset_idx))
->expected_outputs();

if (bundled_expected_outputs->size() == 0) {
ET_LOG(
Error,
"No bundled expected outputs, so we can't verify the method outputs.");
return {Error::InvalidArgument, 0, 0, 0, 0};
}

// abs_err = (a - b).abs()
// relative_err = (a - b).abs() / torch.maximum(torch.tensor(1e-8),
// torch.maximum(a.abs(), b.abs()))
double sum_abs = 0.0, max_abs = 0.0;
double sum_rel = 0.0, max_rel = 0.0;
// Make sure divider is bigger then eps=1e-8f to behave better around 0 values
const double eps = 1e-8f;

int64_t total_elems = 0;

for (size_t output_idx = 0; output_idx < method.outputs_size();
output_idx++) {
auto bundled_expected_output =
bundled_expected_outputs->GetMutableObject(output_idx);
auto method_output = method.get_output(output_idx);
switch (bundled_expected_output->val_type()) {
case bundled_program_flatbuffer::ValueUnion::Tensor: {
auto bundled_expected_output_tensor =
static_cast<bundled_program_flatbuffer::Tensor*>(
bundled_expected_output->mutable_val());
const auto method_output_tensor = method_output.toTensor();

#ifdef USE_ATEN_LIB
Tensor expected = tensor_like(bundled_expected_output_tensor);
#else // !USE_ATEN_LIB
TensorImpl impl = impl_like(bundled_expected_output_tensor);
Tensor expected = Tensor(&impl);
#endif
// sanity check
int64_t nelem = expected.numel();
if (method_output_tensor.numel() != nelem) {
ET_LOG(Error, "Tensor size mismatch");
return {Error::InvalidArgument, 0, 0, 0, 0};
}

// we assume float32 here; adapt for other dtypes as needed
const float* e_data = expected.data_ptr<float>();
const float* a_data = method_output_tensor.data_ptr<float>();

for (int64_t k = 0; k < nelem; ++k) {
double abs_err = std::abs(a_data[k] - e_data[k]);
double relative_divider =
std::max(std::abs(a_data[k]), std::abs(e_data[k]));
relative_divider = std::max(relative_divider, eps);
double relative_err = abs_err / relative_divider;

sum_abs += abs_err;
max_abs = std::max(max_abs, abs_err);
sum_rel += relative_err;
max_rel = std::max(max_rel, relative_err);
Comment on lines +424 to +433
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this good? I'm no ML-math-stats person so if this can be improved we should in PR or after :)

}
total_elems += nelem;
break;
}
default: {
ET_LOG(
Error,
"Data type %hhd not supported",
static_cast<uint8_t>(bundled_expected_output->val_type()));
return {Error::NotSupported, 0, 0, 0, 0};
break; // Never reached
}
}
}

if (total_elems == 0) {
return {Error::Ok, 0, 0, 0, 0};
}
return {
Error::Ok,
sum_abs / total_elems,
max_abs,
sum_rel / total_elems,
max_rel};
}

ET_NODISCARD Error verify_method_outputs(
Method& method,
SerializedBundledProgram* bundled_program_ptr,
Expand Down
26 changes: 26 additions & 0 deletions devtools/bundled_program/bundled_program.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
* Copyright 2025 Arm Limited and/or its affiliates.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
Expand Down Expand Up @@ -40,6 +41,31 @@ ET_NODISCARD ::executorch::runtime::Error load_bundled_input(
SerializedBundledProgram* bundled_program_ptr,
size_t testset_idx);

struct ErrorStats {
::executorch::runtime::Error status;
double mean_abs_error;
double max_abs_error;
double mean_relative_error;
double max_relative_error;
};

/**
* Compute error stats for method.outputs() vs. the bundled "expected_outputs"
* for testset_idx.
*
* @param[in] method The Method to extract outputs from.
* @param[in] bundled_program_ptr The bundled program contains expected output.
* @param[in] testset_idx The index of expected output needs to be compared.
*
* @returns Return ErrorStats with status set to Error::Ok if stats are filled
* in.
*/

ET_NODISCARD ErrorStats compute_method_output_error_stats(
Method& method,
SerializedBundledProgram* bundled_program_ptr,
size_t testset_idx);

/**
* Compare the Method's output with testset_idx-th bundled expected
* output in method_idx-th Method test.
Expand Down
24 changes: 23 additions & 1 deletion examples/arm/executor_runner/arm_executor_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,11 @@ using executorch::runtime::Result;
using executorch::runtime::Span;
using executorch::runtime::Tag;
using executorch::runtime::TensorInfo;
#if defined(ET_BUNDLE_IO)
using executorch::bundled_program::compute_method_output_error_stats;
using executorch::bundled_program::ErrorStats;
using executorch::bundled_program::verify_method_outputs;
#endif
#if defined(ET_EVENT_TRACER_ENABLED)
using executorch::etdump::ETDumpGen;
using executorch::etdump::ETDumpResult;
Expand Down Expand Up @@ -850,8 +855,25 @@ int main(int argc, const char* argv[]) {

#if defined(ET_BUNDLE_IO)
if (bundle_io) {
// Check result
ErrorStats stats =
compute_method_output_error_stats(*method, model_pte, testset_idx);
if (stats.status == Error::Ok) {
ET_LOG(Info, "=== Error stats for testset %d ===", testset_idx);
ET_LOG(Info, " mean_absolute_error: %f", stats.mean_abs_error);
ET_LOG(Info, " max_absolute_error: %f", stats.max_abs_error);
ET_LOG(Info, " mean_relative_error: %f", stats.mean_relative_error);
ET_LOG(Info, " max_relative_error: %f", stats.max_relative_error);
} else {
ET_LOG(
Info,
"=== Error calculating stats for testset %d ERROR:%d ===",
testset_idx,
stats.status);
}

// Verify the result.
status = executorch::bundled_program::verify_method_outputs(
status = verify_method_outputs(
*method, model_pte, testset_idx, et_rtol, et_atol);
if (status == Error::Ok) {
ET_LOG(Info, "Model output match expected BundleIO bpte ref data.");
Expand Down
Loading