IOManager Interface

JacobSzwejbka · facebook-github-bot · commit 934cad43b477 · 2025-04-23T15:47:46.000-07:00
Summary:
Hopefully this is sufficient for the contract.

Going to do 2 follow up tests.

Add a basic cpu implementation

add a static attention implementation.

Differential Revision: D73450877
diff --git a/extension/llm/runner/io_manager/TARGETS b/extension/llm/runner/io_manager/TARGETS
@@ -0,0 +1,8 @@
+# Any targets that should be shared between fbcode and xplat must be defined in
+# targets.bzl. This file can contain fbcode-only targets.
+
+load(":targets.bzl", "define_common_targets")
+
+oncall("executorch")
+
+define_common_targets()
diff --git a/extension/llm/runner/io_manager/io_manager.h b/extension/llm/runner/io_manager/io_manager.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/runtime/executor/method_meta.h>
+#include <executorch/extension/tensor/tensor.h>
+#include <runtime/executor/method.h>
+
+namespace executorch {
+namespace extension {
+namespace llm {
+
+/**
+ * @brief Base class for managing input/output operations for LLM inference.
+ * 
+ * IOManagerBase provides an interface for handling the input preparation and output
+ * processing for both prefill and decode phases of LLM inference. Derived classes
+ * must implement the virtual methods to provide specific IO management functionality.
+ */
+class ET_EXPERIMENTAL IOManagerBase {
+  public:
+   
+   /**
+    * @brief Virtual destructor to allow proper cleanup in derived classes.
+    */
+  ET_EXPERIMENTAL virtual ~IOManagerBase() = default;
+
+
+   /**
+    * @brief Initialize the IO manager with method metadata for prefill and decode operations.
+    * 
+    * @param prefill_method The prefill method to initialize with.
+    * @param decode_method The decode method to initialize with.
+    */
+  ET_EXPERIMENTAL ET_NODISCARD virtual runtime::Error init(
+    executorch::runtime::Method& prefill_method,
+    executorch::runtime::Method& decode_method) = 0;
+
+  /**
+   * @brief Reset the IO manager state.
+   * 
+   * @param prefill_method The prefill method to reset with.
+   * @param decode_method The decode method to reset with.
+   */
+   ET_EXPERIMENTAL ET_NODISCARD virtual runtime::Error reset(
+    executorch::runtime::Method& prefill_method,
+    executorch::runtime::Method& decode_method) = 0;
+
+  /**
+   * @brief Prepare inputs for the prefill phase of LLM inference.
+   * 
+   * @param input The input tensor containing token IDs.
+   * @param start_pos The tensor containing the starting position of the current input within the context.
+   * @param prefill_method The prefill method to prepare inputs for.
+   * @return std::vector<executorch::runtime::EValue> Vector of prepared inputs for the prefill method.
+   */
+   ET_EXPERIMENTAL virtual runtime::Result<std::vector<executorch::runtime::EValue>> prepare_prefill(
+    const executorch::extension::TensorPtr& input,
+    const executorch::extension::TensorPtr& start_pos, 
+    executorch::runtime::Method& prefill_method) = 0;
+
+  /**
+   * @brief Prepare inputs for the decode phase of LLM inference.
+   * 
+   * @param input The input tensor containing token IDs.
+   * @param start_pos The tensor containing the starting position of the current input within the context.
+   * @param decode_method The decode method to prepare inputs for.
+   * @return std::vector<executorch::runtime::EValue> Vector of prepared inputs for the decode method.
+   */
+   ET_EXPERIMENTAL virtual runtime::Result<std::vector<executorch::runtime::EValue>> prepare_decode(
+    const executorch::extension::TensorPtr& input,
+    const executorch::extension::TensorPtr& start_pos, 
+    executorch::runtime::Method& decode_method) = 0;
+
+  /**
+   * @brief Process and update internal state with outputs from the prefill phase.
+   * 
+   * @param prefill_method The prefill method to update with outputs.
+   * @param model_outputs Vector of outputs from the prefill method execution.
+   */
+   ET_EXPERIMENTAL ET_NODISCARD virtual runtime::Error update_prefill(
+    executorch::runtime::Method& prefill_method,
+    const std::vector<executorch::runtime::EValue>& model_outputs) = 0;
+
+  /**
+   * @brief Process and update internal state with outputs from the decode phase.
+   * 
+   * @param decode_method The decode method to update with outputs.
+   * @param model_outputs Vector of outputs from the decode method execution.
+   */
+   ET_EXPERIMENTAL ET_NODISCARD virtual runtime::Error update_decode(
+    const executorch::runtime::Method& decode_method,
+    const std::vector<executorch::runtime::EValue>& model_outputs) = 0;
+
+};
+
+} // namespace llm
+} // namespace extension
+} // namespace executorch
diff --git a/extension/llm/runner/io_manager/targets.bzl b/extension/llm/runner/io_manager/targets.bzl
@@ -0,0 +1,12 @@
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
+
+def define_common_targets():
+    runtime.cxx_library(
+        name = "io_manager",
+        exported_headers = [
+            "io_manager.h",
+        ],
+        visibility = [
+            "@EXECUTORCH_CLIENTS",
+        ],
+    )