-
Notifications
You must be signed in to change notification settings - Fork 668
Add manifest extension AoT #14128
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Add manifest extension AoT #14128
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
load("@fbcode_macros//build_defs:python_library.bzl", "python_library") | ||
|
||
oncall("executorch") | ||
|
||
python_library( | ||
name = "_manifest", | ||
srcs = [ | ||
"_manifest.py", | ||
], | ||
deps = [ | ||
"//executorch/exir/_serialize:lib", | ||
"//executorch/exir:_warnings", | ||
], | ||
visibility = ["PUBLIC"], | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# Copyright (c) Meta Platforms, Inc. and affiliates. | ||
# All rights reserved. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
from executorch.extension.manifest._manifest import append_manifest, Manifest | ||
|
||
__all__ = [ | ||
"Manifest", | ||
"append_manifest", | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,206 @@ | ||
from dataclasses import dataclass | ||
from typing import ClassVar, Literal | ||
|
||
from executorch.exir._serialize.padding import padding_required | ||
from executorch.exir._warnings import experimental | ||
|
||
# Byte order of numbers written to the manifest. Always little-endian | ||
# regardless of the host system, since all commonly-used modern CPUs are little | ||
# endian. | ||
_MANIFEST_BYTEORDER: Literal["little"] = "little" | ||
|
||
|
||
@dataclass | ||
class _ManifestLayout: | ||
"""Python class mirroring the binary layout of the manifest. | ||
separate from the Manifest class, which is the user facing | ||
representation. | ||
""" | ||
|
||
EXPECTED_MAGIC: ClassVar[bytes] = b"em00" | ||
|
||
MAX_SIGNATURE_SIZE: ClassVar[int] = 512 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this fixed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So we can just do one load at runtime. Instead of 2 loads or a stream. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 512 should also cover the vast majority of cryptographic signature algorithms I saw. |
||
|
||
EXPECTED_MIN_LENGTH: ClassVar[int] = ( | ||
# Header magic | ||
4 | ||
# Header length | ||
+ 4 | ||
# program offset | ||
+ 8 | ||
# Padding | ||
+ 4 | ||
# signature size | ||
+ 4 | ||
) | ||
|
||
EXPECTED_MAX_LENGTH: ClassVar[int] = EXPECTED_MIN_LENGTH + MAX_SIGNATURE_SIZE | ||
|
||
signature: bytes | ||
|
||
# The actual size of the signature | ||
signature_size: int = 0 | ||
|
||
# The size of any padding required | ||
padding_size: int = 0 | ||
|
||
# Size in bytes between the top of the manifest and the start of the data it was appended to. | ||
program_offset: int = 0 | ||
|
||
# The manifest length, in bytes, read from or to be written to the binary | ||
# footer. | ||
length: int = 0 | ||
|
||
# The magic bytes read from or to be written to the binary footer. | ||
magic: bytes = EXPECTED_MAGIC | ||
|
||
def __post_init__(self): | ||
"""Post init hook to validate the manifest.""" | ||
if self.signature_size == 0: | ||
self.signature_size = len(self.signature) | ||
if self.length == 0: | ||
self.length = _ManifestLayout.EXPECTED_MIN_LENGTH + self.signature_size | ||
|
||
# Not using self.is_valid() here to deliver better error messages. | ||
if len(self.signature) > _ManifestLayout.MAX_SIGNATURE_SIZE: | ||
raise ValueError( | ||
f"Signature is too large. {self.signature_size}. Manifest only supports signatures up to {_ManifestLayout.MAX_SIGNATURE_SIZE} bytes." | ||
) | ||
if self.magic != _ManifestLayout.EXPECTED_MAGIC: | ||
raise ValueError( | ||
f"Invalid magic. Expected {_ManifestLayout.EXPECTED_MAGIC}. Got {self.magic}" | ||
) | ||
if self.length < _ManifestLayout.EXPECTED_MIN_LENGTH: | ||
raise ValueError( | ||
f"Invalid length. Expected at least {_ManifestLayout.EXPECTED_MIN_LENGTH}. Got {self.length}" | ||
) | ||
if self.length > _ManifestLayout.EXPECTED_MAX_LENGTH: | ||
raise ValueError( | ||
f"Invalid length. Expected at most {_ManifestLayout.EXPECTED_MAX_LENGTH}. Got {self.length}" | ||
) | ||
if self.signature_size != len(self.signature): | ||
raise ValueError( | ||
f"Invalid signature size must match len(self.signature). Expected {len(self.signature)}. Got {self.signature_size}" | ||
) | ||
|
||
def is_valid(self) -> bool: | ||
"""Returns true if the manifest appears to be well-formed.""" | ||
return ( | ||
self.magic == _ManifestLayout.EXPECTED_MAGIC | ||
and self.length >= _ManifestLayout.EXPECTED_MIN_LENGTH | ||
and self.length <= _ManifestLayout.EXPECTED_MAX_LENGTH | ||
and self.signature_size >= 0 | ||
and self.signature_size <= _ManifestLayout.MAX_SIGNATURE_SIZE | ||
and self.program_offset >= 0 | ||
and len(self.signature) == self.signature_size | ||
) | ||
|
||
def to_bytes(self) -> bytes: | ||
""""Returns the binary representation of the Manifest. Written bottom up | ||
to allow for BC considerations. The compatibility-preserving way to make | ||
changes is to increase the header's length field and add new fields at | ||
the top. This means we can always check the last 8 bytes for the magic | ||
and size, and then load the full footer." | ||
""" | ||
if not self.is_valid(): | ||
raise ValueError("Cannot serialize an invalid manifest") | ||
|
||
data: bytes = ( | ||
# bytes: Signature unique ID for the data the manifest was appended to. | ||
self.signature | ||
# actual size of the signature | ||
+ self.signature_size.to_bytes(4, byteorder=_MANIFEST_BYTEORDER) | ||
# uint32_t: Any padding required to align the manifest. | ||
+ self.padding_size.to_bytes(4, byteorder=_MANIFEST_BYTEORDER) | ||
# uint64_t: Size in bytes between the manifest and the data it was appended to. | ||
+ self.program_offset.to_bytes(8, byteorder=_MANIFEST_BYTEORDER) | ||
# uint32_t: Actual size of this manifest. | ||
+ self.length.to_bytes(4, byteorder=_MANIFEST_BYTEORDER) | ||
# Manifest magic. This lets consumers detect whether the | ||
# manifest was inserted or not. Always use the proper magic value | ||
# (i.e., ignore self.magic) since there's no reason to create an | ||
# invalid manifest. | ||
+ self.EXPECTED_MAGIC | ||
) | ||
return data | ||
|
||
@staticmethod | ||
def from_bytes(data: bytes) -> "_ManifestLayout": | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For large files you have to read the whole thing? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could add more methods like from file that just load the last MAX_SIZE bytes. Or stream. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I dont really expect people to be verifying the signature in python though. Its mostly just there for testing. |
||
"""Tries to read a manifest from the provided data. | ||
|
||
Does not validate that the header is well-formed. Callers should | ||
use is_valid(). | ||
|
||
Args: | ||
data: The data to read from. | ||
Returns: | ||
The contents of the serialized manifest. | ||
Raises: | ||
ValueError: If not enough data is provided. | ||
""" | ||
if len(data) <= _ManifestLayout.EXPECTED_MIN_LENGTH: | ||
raise ValueError( | ||
f"Not enough data for the manifest: {len(data)} " | ||
+ f"< {_ManifestLayout.EXPECTED_MIN_LENGTH}" | ||
) | ||
magic = data[-4:] | ||
length = int.from_bytes(data[-8:-4], byteorder=_MANIFEST_BYTEORDER) | ||
program_offset = int.from_bytes(data[-16:-8], byteorder=_MANIFEST_BYTEORDER) | ||
padding_size = int.from_bytes(data[-20:-16], byteorder=_MANIFEST_BYTEORDER) | ||
signature_size = int.from_bytes(data[-24:-20], byteorder=_MANIFEST_BYTEORDER) | ||
signature = data[-(signature_size + 24) : -24] | ||
return _ManifestLayout( | ||
signature=signature, | ||
signature_size=signature_size, | ||
padding_size=padding_size, | ||
program_offset=program_offset, | ||
length=length, | ||
magic=magic, | ||
) | ||
|
||
@staticmethod | ||
def from_manifest(manifest: "Manifest") -> "_ManifestLayout": | ||
return _ManifestLayout( | ||
signature=manifest.signature, | ||
signature_size=len(manifest.signature), | ||
length=_ManifestLayout.EXPECTED_MIN_LENGTH + len(manifest.signature), | ||
# program_offset and padding_size are set at append time. | ||
) | ||
|
||
|
||
@experimental("This API is experimental and subject to change without notice.") | ||
@dataclass | ||
class Manifest: | ||
"""A manifest that can be appended to a binary blob. The manifest contains | ||
meta information about the binary blob. You must know who created the manifest | ||
to be able to interpret the data in the manifest.""" | ||
|
||
# Unique ID for the data the manifest was appended to. Often this might contain | ||
# a crytographic signature for the data. | ||
signature: bytes | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add version There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can imagine people can use this for other use-cases besides security, such as saving arbitrary serializable metadata. For instance, saving tokenizer.json file location etc. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It wasnt really the intent. I chose this impl here because I wanted a really light weight way to attach security information or other core metadata about the pte. If we want it to store arbitrary user defined things like a json then I dont really think appending to the .pte is the correct solution, just shove it all in a zip would be my opinion. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should you add timestamp field too? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Something like this?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is version the version of the manifest struct or user specified? If a user wanted to have multiple things then would you expect them to daisy chain manifests? What is attributes? |
||
@staticmethod | ||
def _from_manifest_layout(layout: _ManifestLayout) -> "Manifest": | ||
return Manifest( | ||
signature=layout.signature, | ||
) | ||
|
||
@staticmethod | ||
def from_bytes(data: bytes) -> "Manifest": | ||
"""Tries to read a manifest from the provided data.""" | ||
layout = _ManifestLayout.from_bytes(data) | ||
if not layout.is_valid(): | ||
raise ValueError("Cannot parse manifest from bytes") | ||
return Manifest._from_manifest_layout(layout) | ||
|
||
|
||
@experimental("This API is experimental and subject to change without notice.") | ||
def append_manifest(pte_data: bytes, manifest: Manifest, alignment: int = 16): | ||
"""Appends a manifest to the provided data.""" | ||
padding = padding_required(len(pte_data), alignment) | ||
|
||
manifest_layout = _ManifestLayout.from_manifest(manifest) | ||
manifest_layout.program_offset = len(pte_data) + manifest_layout.padding_size | ||
manifest_layout.padding_size = padding | ||
|
||
return pte_data + (b"\x00" * padding) + manifest_layout.to_bytes() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest") | ||
|
||
oncall("executorch") | ||
|
||
python_unittest( | ||
name = "test_manifest", | ||
srcs = [ | ||
"test_manifest.py", | ||
], | ||
deps = [ | ||
"//executorch/extension/manifest:_manifest", | ||
"//executorch/extension/pybindings:portable_lib", | ||
"//executorch/exir:lib", | ||
"//caffe2:torch", | ||
], | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Write a docblock about an example usage of manifest file for higher layer consumers. Also mention that manifest is a mechanism, not a security policy. And explicitly say that consumers implements appropriate security for their threat model