Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions extension/manifest/TARGETS
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
load("@fbcode_macros//build_defs:python_library.bzl", "python_library")

oncall("executorch")

python_library(
name = "_manifest",
srcs = [
"_manifest.py",
],
deps = [
"//executorch/exir/_serialize:lib",
"//executorch/exir:_warnings",
],
visibility = ["PUBLIC"],
)
12 changes: 12 additions & 0 deletions extension/manifest/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from executorch.extension.manifest._manifest import append_manifest, Manifest

__all__ = [
"Manifest",
"append_manifest",
]
206 changes: 206 additions & 0 deletions extension/manifest/_manifest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
from dataclasses import dataclass
from typing import ClassVar, Literal

from executorch.exir._serialize.padding import padding_required
from executorch.exir._warnings import experimental

# Byte order of numbers written to the manifest. Always little-endian
# regardless of the host system, since all commonly-used modern CPUs are little
# endian.
_MANIFEST_BYTEORDER: Literal["little"] = "little"


Comment on lines +11 to +12
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Write a docblock about an example usage of manifest file for higher layer consumers. Also mention that manifest is a mechanism, not a security policy. And explicitly say that consumers implements appropriate security for their threat model

# 1. Generate PTE file
pte_data = serialize_pte_binary(program)

# 2. Create cryptographic signature of PTE data
signature = sign_with_private_key(pte_data, private_key)  # e.g., RSA, ECDSA

# 3. Append manifest with signature
manifest = Manifest(signature=signature)
pte_with_manifest = append_manifest(pte_data, manifest)

Verification Process

# 1. Extract manifest from end of file
manifest = Manifest.from_bytes(file_data)

# 2. Extract PTE data (using program_offset)
pte_data = file_data[:-(manifest_length + padding)]

# 3. Verify signature with public key
is_valid = verify_signature(pte_data, manifest.signature, public_key)

@dataclass
class _ManifestLayout:
"""Python class mirroring the binary layout of the manifest.
separate from the Manifest class, which is the user facing
representation.
"""

EXPECTED_MAGIC: ClassVar[bytes] = b"em00"

MAX_SIGNATURE_SIZE: ClassVar[int] = 512
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this fixed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So we can just do one load at runtime. Instead of 2 loads or a stream.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

512 should also cover the vast majority of cryptographic signature algorithms I saw.


EXPECTED_MIN_LENGTH: ClassVar[int] = (
# Header magic
4
# Header length
+ 4
# program offset
+ 8
# Padding
+ 4
# signature size
+ 4
)

EXPECTED_MAX_LENGTH: ClassVar[int] = EXPECTED_MIN_LENGTH + MAX_SIGNATURE_SIZE

signature: bytes

# The actual size of the signature
signature_size: int = 0

# The size of any padding required
padding_size: int = 0

# Size in bytes between the top of the manifest and the start of the data it was appended to.
program_offset: int = 0

# The manifest length, in bytes, read from or to be written to the binary
# footer.
length: int = 0

# The magic bytes read from or to be written to the binary footer.
magic: bytes = EXPECTED_MAGIC

def __post_init__(self):
"""Post init hook to validate the manifest."""
if self.signature_size == 0:
self.signature_size = len(self.signature)
if self.length == 0:
self.length = _ManifestLayout.EXPECTED_MIN_LENGTH + self.signature_size

# Not using self.is_valid() here to deliver better error messages.
if len(self.signature) > _ManifestLayout.MAX_SIGNATURE_SIZE:
raise ValueError(
f"Signature is too large. {self.signature_size}. Manifest only supports signatures up to {_ManifestLayout.MAX_SIGNATURE_SIZE} bytes."
)
if self.magic != _ManifestLayout.EXPECTED_MAGIC:
raise ValueError(
f"Invalid magic. Expected {_ManifestLayout.EXPECTED_MAGIC}. Got {self.magic}"
)
if self.length < _ManifestLayout.EXPECTED_MIN_LENGTH:
raise ValueError(
f"Invalid length. Expected at least {_ManifestLayout.EXPECTED_MIN_LENGTH}. Got {self.length}"
)
if self.length > _ManifestLayout.EXPECTED_MAX_LENGTH:
raise ValueError(
f"Invalid length. Expected at most {_ManifestLayout.EXPECTED_MAX_LENGTH}. Got {self.length}"
)
if self.signature_size != len(self.signature):
raise ValueError(
f"Invalid signature size must match len(self.signature). Expected {len(self.signature)}. Got {self.signature_size}"
)

def is_valid(self) -> bool:
"""Returns true if the manifest appears to be well-formed."""
return (
self.magic == _ManifestLayout.EXPECTED_MAGIC
and self.length >= _ManifestLayout.EXPECTED_MIN_LENGTH
and self.length <= _ManifestLayout.EXPECTED_MAX_LENGTH
and self.signature_size >= 0
and self.signature_size <= _ManifestLayout.MAX_SIGNATURE_SIZE
and self.program_offset >= 0
and len(self.signature) == self.signature_size
)

def to_bytes(self) -> bytes:
""""Returns the binary representation of the Manifest. Written bottom up
to allow for BC considerations. The compatibility-preserving way to make
changes is to increase the header's length field and add new fields at
the top. This means we can always check the last 8 bytes for the magic
and size, and then load the full footer."
"""
if not self.is_valid():
raise ValueError("Cannot serialize an invalid manifest")

data: bytes = (
# bytes: Signature unique ID for the data the manifest was appended to.
self.signature
# actual size of the signature
+ self.signature_size.to_bytes(4, byteorder=_MANIFEST_BYTEORDER)
# uint32_t: Any padding required to align the manifest.
+ self.padding_size.to_bytes(4, byteorder=_MANIFEST_BYTEORDER)
# uint64_t: Size in bytes between the manifest and the data it was appended to.
+ self.program_offset.to_bytes(8, byteorder=_MANIFEST_BYTEORDER)
# uint32_t: Actual size of this manifest.
+ self.length.to_bytes(4, byteorder=_MANIFEST_BYTEORDER)
# Manifest magic. This lets consumers detect whether the
# manifest was inserted or not. Always use the proper magic value
# (i.e., ignore self.magic) since there's no reason to create an
# invalid manifest.
+ self.EXPECTED_MAGIC
)
return data

@staticmethod
def from_bytes(data: bytes) -> "_ManifestLayout":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For large files you have to read the whole thing?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could add more methods like from file that just load the last MAX_SIZE bytes. Or stream.

Copy link
Contributor Author

@JacobSzwejbka JacobSzwejbka Sep 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I dont really expect people to be verifying the signature in python though. Its mostly just there for testing.

"""Tries to read a manifest from the provided data.

Does not validate that the header is well-formed. Callers should
use is_valid().

Args:
data: The data to read from.
Returns:
The contents of the serialized manifest.
Raises:
ValueError: If not enough data is provided.
"""
if len(data) <= _ManifestLayout.EXPECTED_MIN_LENGTH:
raise ValueError(
f"Not enough data for the manifest: {len(data)} "
+ f"< {_ManifestLayout.EXPECTED_MIN_LENGTH}"
)
magic = data[-4:]
length = int.from_bytes(data[-8:-4], byteorder=_MANIFEST_BYTEORDER)
program_offset = int.from_bytes(data[-16:-8], byteorder=_MANIFEST_BYTEORDER)
padding_size = int.from_bytes(data[-20:-16], byteorder=_MANIFEST_BYTEORDER)
signature_size = int.from_bytes(data[-24:-20], byteorder=_MANIFEST_BYTEORDER)
signature = data[-(signature_size + 24) : -24]
return _ManifestLayout(
signature=signature,
signature_size=signature_size,
padding_size=padding_size,
program_offset=program_offset,
length=length,
magic=magic,
)

@staticmethod
def from_manifest(manifest: "Manifest") -> "_ManifestLayout":
return _ManifestLayout(
signature=manifest.signature,
signature_size=len(manifest.signature),
length=_ManifestLayout.EXPECTED_MIN_LENGTH + len(manifest.signature),
# program_offset and padding_size are set at append time.
)


@experimental("This API is experimental and subject to change without notice.")
@dataclass
class Manifest:
"""A manifest that can be appended to a binary blob. The manifest contains
meta information about the binary blob. You must know who created the manifest
to be able to interpret the data in the manifest."""

# Unique ID for the data the manifest was appended to. Often this might contain
# a crytographic signature for the data.
signature: bytes

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add version

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can imagine people can use this for other use-cases besides security, such as saving arbitrary serializable metadata.

For instance, saving tokenizer.json file location etc.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can imagine people can use this for other use-cases besides security, such as saving arbitrary serializable metadata.

It wasnt really the intent. I chose this impl here because I wanted a really light weight way to attach security information or other core metadata about the pte.

If we want it to store arbitrary user defined things like a json then I dont really think appending to the .pte is the correct solution, just shove it all in a zip would be my opinion.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should you add timestamp field too?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something like this?

  @dataclass
  class Manifest:
      type: str  # "signature", "checksum", "metadata", etc.
      version: int
      payload: bytes 
      timestamp: Optional[int]
      attributes: Dict[str, str] 

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is version the version of the manifest struct or user specified?

If a user wanted to have multiple things then would you expect them to daisy chain manifests?

What is attributes?

@staticmethod
def _from_manifest_layout(layout: _ManifestLayout) -> "Manifest":
return Manifest(
signature=layout.signature,
)

@staticmethod
def from_bytes(data: bytes) -> "Manifest":
"""Tries to read a manifest from the provided data."""
layout = _ManifestLayout.from_bytes(data)
if not layout.is_valid():
raise ValueError("Cannot parse manifest from bytes")
return Manifest._from_manifest_layout(layout)


@experimental("This API is experimental and subject to change without notice.")
def append_manifest(pte_data: bytes, manifest: Manifest, alignment: int = 16):
"""Appends a manifest to the provided data."""
padding = padding_required(len(pte_data), alignment)

manifest_layout = _ManifestLayout.from_manifest(manifest)
manifest_layout.program_offset = len(pte_data) + manifest_layout.padding_size
manifest_layout.padding_size = padding

return pte_data + (b"\x00" * padding) + manifest_layout.to_bytes()
16 changes: 16 additions & 0 deletions extension/manifest/test/TARGETS
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest")

oncall("executorch")

python_unittest(
name = "test_manifest",
srcs = [
"test_manifest.py",
],
deps = [
"//executorch/extension/manifest:_manifest",
"//executorch/extension/pybindings:portable_lib",
"//executorch/exir:lib",
"//caffe2:torch",
],
)
Loading
Loading