Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
5350547
Support preprocess_multimethod with extracted_share_data in Neuropilo…
neuropilot-captain May 12, 2025
705f94e
Support weight sharing in MTK Runtime
neuropilot-captain May 26, 2025
e8e7429
Apply lintrunner
neuropilot-captain May 26, 2025
cbcb919
remove dependancy to getPaddedSize
neuropilot-captain May 27, 2025
a0bfa5d
Add shared weights flow to llama export script
neuropilot-captain Jun 3, 2025
a6da626
Refine code
neuropilot-captain Jun 3, 2025
7e6a7d6
Merge pull request #3 from neuropilot-captain/extract_share_runtime
neuropilot-captain Jun 3, 2025
39f3c5c
Added support for Qwen, Phi, Gemma & Whisper
neuropilot-captain Aug 26, 2025
56b19fb
Fix lintrunner errors
neuropilot-captain Aug 28, 2025
2f4b9ad
Bug fix, lintrunner error fix & qwen3 gemma2 runner support
neuropilot-captain Sep 1, 2025
84c81a3
Fix backend IO order bug
neuropilot-captain Sep 1, 2025
486dd4e
First working llama shared weights flow
neuropilot-captain Sep 1, 2025
7cc7321
Merge remote-tracking branch 'upstream/main' into extract_share
neuropilot-captain Sep 5, 2025
b2303b2
Merge branch 'main' into extract_share
neuropilot-captain Sep 5, 2025
dbe864d
Fix conflict
neuropilot-captain Sep 5, 2025
6491161
Update for delegate interface changes
neuropilot-captain Sep 5, 2025
40d6d43
Fix gemma3 AoT SWA Mask
neuropilot-captain Sep 7, 2025
62352f4
Added platform-config argument
neuropilot-captain Sep 7, 2025
34e286b
Update runner support for varying number of inputs
neuropilot-captain Sep 7, 2025
d417b06
Merge remote-tracking branch 'origin/extract_share' into support_qwen…
neuropilot-captain Sep 7, 2025
781e953
Support share weights for phi, gemma, qwen
neuropilot-captain Sep 7, 2025
6b30094
Fix lint errors and update llama sample run script
neuropilot-captain Sep 8, 2025
ea5cd4d
Merge remote-tracking branch 'origin/extract_share' into support_qwen…
neuropilot-captain Sep 9, 2025
fd52664
Update runner for weights sharing
neuropilot-captain Sep 9, 2025
14ce449
Revert for fix conflict
neuropilot-captain Sep 9, 2025
0a28404
Merge branch 'main' into support_qwen_phi_gemma_whisper
neuropilot-captain Sep 9, 2025
a427f62
Merge branch 'main' into support_qwen_phi_gemma_whisper
neuropilot-captain Sep 9, 2025
018e574
Fix conflicts
neuropilot-captain Sep 9, 2025
f481c2c
Fix lint errors
neuropilot-captain Sep 10, 2025
31ac425
Merge branch 'main' into support_qwen_phi_gemma_whisper
neuropilot-captain Sep 10, 2025
855a143
Merge branch 'main' into support_qwen_phi_gemma_whisper
neuropilot-captain Sep 10, 2025
f09ffd3
Fix lint-url errors
neuropilot-captain Sep 11, 2025
a8ccdb6
Fix lint-url error
neuropilot-captain Sep 11, 2025
4f65d18
Merge branch 'main' into support_qwen_phi_gemma_whisper
neuropilot-captain Sep 12, 2025
8e237e2
fix ci lint error with workaround patch
neuropilot-captain Sep 12, 2025
ddc8ce8
Merge branch 'main' into support_qwen_phi_gemma_whisper
neuropilot-captain Sep 12, 2025
97e73d2
remove large files for lint
neuropilot-captain Sep 12, 2025
66cad81
Remove large files
neuropilot-captain Sep 12, 2025
ed29c7d
Add gemma3 and phi4 run.sh
neuropilot-captain Sep 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backends/mediatek/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
HEADER_SIZE = 13
HEADER_VERSION = 1
REQUIRED_COMPILE_SPEC_KEYS = {"platform-config"}
SUPPORTED_PLATFORM_CONFIGS = {"mt6989", "mt6991"}
SUPPORTED_PLATFORM_CONFIGS = {"mt6989", "mt6991", "mt6993"}


def assert_default_dim_order(edge_graph_module: torch.fx.GraphModule) -> None:
Expand Down
2,385 changes: 2,385 additions & 0 deletions backends/mediatek/runtime/include/api/NeuronAdapter.h

Large diffs are not rendered by default.

74 changes: 73 additions & 1 deletion examples/mediatek/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,11 @@ if(${ANDROID})
${EXTENSIONS_LLM_DIR}/tokenizers/third-party/abseil-cpp
)
set(THIRD_PARTY_RE2_DIR ${EXTENSIONS_LLM_DIR}/tokenizers/third-party/re2)
set(THIRD_PARTY_JSON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../third-party/json)
set(THIRD_PARTY_UNICODE_DIR
${EXTENSIONS_LLM_DIR}/tokenizers/third-party/llama.cpp-unicode
)
set(THIRD_PARTY_PCRE2_DIR ${EXTENSIONS_LLM_DIR}/tokenizers/third-party/pcre2)
set(ABSL_ENABLE_INSTALL ON)
set(ABSL_PROPAGATE_CXX_STD ON)
set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE})
Expand All @@ -134,9 +139,22 @@ if(${ANDROID})
${THIRD_PARTY_RE2_DIR}
${CMAKE_CURRENT_BINARY_DIR}/tokenizers/third-party/re2
)
add_subdirectory(
${THIRD_PARTY_JSON_DIR}
${CMAKE_CURRENT_BINARY_DIR}/tokenizers/third-party/json
)
add_subdirectory(
${THIRD_PARTY_UNICODE_DIR}
${CMAKE_CURRENT_BINARY_DIR}/tokenizers/third-party/llama.cpp-unicode
)
add_subdirectory(
${THIRD_PARTY_PCRE2_DIR}
${CMAKE_CURRENT_BINARY_DIR}/tokenizers/third-party/pcre2
)
set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag})

# Build tokenizers
set(SUPPORT_REGEX_LOOKAHEAD ON)
set(LLAMA2_TOKENIZER_DIR ${EXTENSIONS_LLM_DIR}/tokenizers)
add_library(tokenizer STATIC)
target_include_directories(
Expand All @@ -147,8 +165,12 @@ if(${ANDROID})
${LLAMA2_TOKENIZER_DIR}/include
${CMAKE_CURRENT_BINARY_DIR}/tokenizers/third-party/pcre2
${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
${THIRD_PARTY_JSON_DIR}
${THIRD_PARTY_UNICODE_DIR}/include
${THIRD_PARTY_PCRE2_DIR}
)
target_link_libraries(tokenizer PRIVATE re2::re2)

target_sources(
tokenizer
PRIVATE
Expand All @@ -157,9 +179,55 @@ if(${ANDROID})
${LLAMA2_TOKENIZER_DIR}/src/regex.cpp
${LLAMA2_TOKENIZER_DIR}/src/bpe_tokenizer_base.cpp
${LLAMA2_TOKENIZER_DIR}/src/re2_regex.cpp
${LLAMA2_TOKENIZER_DIR}/src/hf_tokenizer.cpp
${LLAMA2_TOKENIZER_DIR}/src/pre_tokenizer.cpp
${LLAMA2_TOKENIZER_DIR}/src/token_decoder.cpp
${LLAMA2_TOKENIZER_DIR}/src/normalizer.cpp
${LLAMA2_TOKENIZER_DIR}/third-party/llama.cpp-unicode/src/unicode.cpp
${LLAMA2_TOKENIZER_DIR}/third-party/llama.cpp-unicode/src/unicode-data.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../models/llama/tokenizer/llama_tiktoken.cpp
)

# Add support for regex_lookahead
set(PCRE2_STATIC_PIC ON)
set(PCRE2_BUILD_PCRE2_8 ON)
set(PCRE2_BUILD_PCRE2_16 OFF)
set(PCRE2_BUILD_PCRE2_32 OFF)
set(PCRE2_BUILD_TESTS OFF)
set(PCRE2_BUILD_PCRE2GREP OFF)
set(PCRE2_BUILD_PCRE2TEST OFF)
set(PCRE2_BUILD_PCRE2GPERF OFF)
set(PCRE2_BUILD_DOCS OFF)
set(PCRE2_BUILD_LIBPCRE2_PDB OFF)

# Set the INTERFACE_INCLUDE_DIRECTORIES property for pcre2-8-static
set_target_properties(
pcre2-8-static
PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/tokenizers/third-party/pcre2>
)
add_library(
regex_lookahead STATIC
${LLAMA2_TOKENIZER_DIR}/src/pcre2_regex.cpp
${LLAMA2_TOKENIZER_DIR}/src/regex_lookahead.cpp
${LLAMA2_TOKENIZER_DIR}/src/std_regex.cpp
)
add_library(tokenizer::regex_lookahead ALIAS regex_lookahead)
target_link_libraries(regex_lookahead PUBLIC pcre2-8-static)
target_include_directories(
regex_lookahead
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
)
target_link_libraries(tokenizer PUBLIC regex_lookahead)
install(
TARGETS regex_lookahead pcre2-8-static
EXPORT tokenizers-targets
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
)

# Include directory for neuron headers
include_directories(
BEFORE ${_common_include_directories}
Expand All @@ -174,7 +242,11 @@ if(${ANDROID})

target_link_libraries(
mtk_llama_executor_runner ${_executor_runner_libs} neuron_backend gflags
mtk_llama_executor_lib tokenizer
mtk_llama_executor_lib
)
target_link_libraries(
mtk_llama_executor_runner tokenizer
$<LINK_LIBRARY:WHOLE_ARCHIVE,regex_lookahead>
)
target_compile_options(
mtk_llama_executor_runner PUBLIC ${_common_compile_options}
Expand Down
6 changes: 6 additions & 0 deletions examples/mediatek/aot_utils/llm_utils/loggingprinter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# Copyright (c) MediaTek Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import sys


Expand Down
6 changes: 6 additions & 0 deletions examples/mediatek/aot_utils/llm_utils/preformatter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# Copyright (c) MediaTek Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import json
import os
from typing import Union
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"description": "Template used by gemma.",
"prompt_input": "<start_of_turn>user\n{instruction}<end_of_turn>\n<start_of_turn>model\n",
"prompt_no_input": "<start_of_turn>user\n{instruction}<end_of_turn>\n<start_of_turn>model\n",
"response_split": "<start_of_turn>model\n"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"description": "Template used by Phi-3.",
"prompt_input": "<|system|>\nYou are a helpful AI assistant. Please provide safe, ethical and accurate information to the user.\n<|user|>\n {instruction} \n <|assistant|>",
"prompt_no_input": "<|system|>\nYou are a helpful AI assistant. Please provide safe, ethical and accurate information to the user.\n<|user|>\n {instruction} \n <|assistant|>",
"response_split": "<|assistant|>"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"description": "Template used by Phi-4.",
"prompt_input": "<|system|>Your name is Phi, an AI expert developed by Microsoft.<|end|><|user|>{instruction}<|end|><|assistant|>",
"prompt_no_input": "<|system|>Your name is Phi, an AI expert developed by Microsoft.<|end|><|user|>{instruction}<|end|><|assistant|>",
"response_split": "<|assistant|>"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"description": "Template used by Qwen.",
"prompt_input": "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n",
"prompt_no_input": "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n",
"response_split": "<|im_start|>assistant\n"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"description": "Template used by Qwen3.",
"prompt_input": "<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n",
"prompt_no_input": "<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n",
"response_split": "assistant\n"
}
13 changes: 13 additions & 0 deletions examples/mediatek/aot_utils/llm_utils/sanity_checks.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# Copyright (c) MediaTek Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import os
import sys

Expand Down Expand Up @@ -127,7 +133,14 @@ def check_supported_model(config):
"qwen",
"qwen1.5",
"qwen2",
"qwen3",
"milm",
"phi3",
"phi4",
"gemma1",
"gemma2",
"gemma3",
"whisper",
]
if not isinstance(config, BaseConfig):
raise RuntimeError(
Expand Down
Loading
Loading