Skip to content

Commit

Permalink
Add log.h
Browse files Browse the repository at this point in the history
Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
  • Loading branch information
larryliu0820 committed Dec 4, 2024
1 parent a875876 commit acad533
Show file tree
Hide file tree
Showing 4 changed files with 369 additions and 27 deletions.
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ project(Tokenizers)

option(TOKENIZERS_BUILD_TEST "Build tests" OFF)

# Ignore weak attribute warning
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes")

set(ABSL_ENABLE_INSTALL ON)
set(ABSL_PROPAGATE_CXX_STD ON)
set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE})
Expand Down
30 changes: 11 additions & 19 deletions include/base64.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,8 @@ inline Error validate(uint32_t v) {
}

inline Error decode(const std::string_view &input, std::string &output) {
if (input.size() != 4) {
fprintf(stderr, "input length must be 4, got %zu", input.size());
return Error::Base64DecodeFailure;
}
TK_CHECK_OR_RETURN_ERROR(input.size() == 4, Base64DecodeFailure,
"input length must be 4, got %zu", input.size());

uint32_t val = 0;

Expand Down Expand Up @@ -104,10 +102,8 @@ inline Error decode(const std::string_view &input, std::string &output) {

inline Error decode_1_padding(const std::string_view &input,
std::string &output) {
if (input.size() != 3) {
fprintf(stderr, "input length must be 3, got %zu", input.size());
return Error::Base64DecodeFailure;
}
TK_CHECK_OR_RETURN_ERROR(input.size() == 3, Base64DecodeFailure,
"input length must be 3, got %zu", input.size());

uint32_t val = 0;

Expand All @@ -133,7 +129,8 @@ inline Error decode_1_padding(const std::string_view &input,

inline Error decode_2_padding(const std::string_view &input,
std::string &output) {
TK_CHECK_OR_RETURN_ERROR(input.size() == 2, Base64DecodeFailure);
TK_CHECK_OR_RETURN_ERROR(input.size() == 2, Base64DecodeFailure,
"input length must be 2, got %zu", input.size());

uint32_t val = 0;

Expand All @@ -154,18 +151,13 @@ inline Error decode_2_padding(const std::string_view &input,
} // namespace detail

inline tokenizers::Result<std::string> decode(const std::string_view &input) {
if (input.empty()) {
fprintf(stderr, "empty input");
return Error::Base64DecodeFailure;
}
TK_CHECK_OR_RETURN_ERROR(!input.empty(), Base64DecodeFailure, "empty input");

// Faster than `input.size() % 4`.
if ((input.size() & 3) != 0 || input.size() < 4) {
fprintf(stderr,
"input length must be larger than 4 and is multiple of 4, got %zu",
input.size());
return Error::Base64DecodeFailure;
}
TK_CHECK_OR_RETURN_ERROR(
(input.size() & 3) == 0 && input.size() >= 4, Base64DecodeFailure,
"input length must be larger than 4 and is multiple of 4, got %zu",
input.size());

std::string output;
output.reserve(input.size() / 4 * 3);
Expand Down
89 changes: 81 additions & 8 deletions include/error.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#pragma once

#include "log.h"
#include <stdint.h>

namespace tokenizers {
Expand Down Expand Up @@ -59,11 +60,14 @@ enum class Error : error_code_t {
* TODO: Add logging support
* @param[in] cond__ The condition to be checked, asserted as true.
* @param[in] error__ Error enum value to return without the `Error::` prefix,
* like `InvalidArgument`.
* like `Base64DecodeFailure`.
* @param[in] message__ Format string for the log error message.
* @param[in] ... Optional additional arguments for the format string.
*/
#define TK_CHECK_OR_RETURN_ERROR(cond__, error__) \
#define TK_CHECK_OR_RETURN_ERROR(cond__, error__, message__, ...) \
{ \
if (!(cond__)) { \
TK_LOG(Error, message__, ##__VA_ARGS__); \
return ::tokenizers::Error::error__; \
} \
}
Expand All @@ -72,11 +76,80 @@ enum class Error : error_code_t {
* If error__ is not Error::Ok, return the specified Error
* TODO: Add logging support
* @param[in] error__ Error enum value to return without the `Error::` prefix,
* like `InvalidArgument`.
* like `Base64DecodeFailure`.
* @param[in] ... Optional format string for the log error message and its
* arguments.
*/
#define TK_CHECK_OK_OR_RETURN_ERROR(error__) \
{ \
if (error__ != ::tokenizers::Error::Ok) { \
return error__; \
#define TK_CHECK_OK_OR_RETURN_ERROR(error__, ...) \
TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR(error__, ##__VA_ARGS__)

// Internal only: Use ET_CHECK_OK_OR_RETURN_ERROR() instead.
#define TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR(...) \
TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_SELECT(__VA_ARGS__, 10, 9, 8, 7, 6, 5, \
4, 3, 2, 1) \
(__VA_ARGS__)

/**
* Internal only: Use TK_CHECK_OK_OR_RETURN_ERROR() instead.
* This macro selects the correct version of
* TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR based on the number of arguments passed.
* It uses a trick with the preprocessor to count the number of arguments and
* then selects the appropriate macro.
*
* The macro expansion uses __VA_ARGS__ to accept any number of arguments and
* then appends them to TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_, followed by the
* count of arguments. The count is determined by the macro
* TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_SELECT which takes the arguments and
* passes them along with a sequence of numbers (2, 1). The preprocessor then
* matches this sequence to the correct number of arguments provided.
*
* If two arguments are passed, TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_2 is
* selected, suitable for cases where an error code and a custom message are
* provided. If only one argument is passed,
* TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_1 is selected, which is used for cases
* with just an error code.
*
* Usage:
* TK_CHECK_OK_OR_RETURN_ERROR(error_code); // Calls v1
* TK_CHECK_OK_OR_RETURN_ERROR(error_code, "Error message", ...); // Calls v2
*/
#define TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_SELECT(_1, _2, _3, _4, _5, _6, \
_7, _8, _9, _10, N, ...) \
TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_##N

// Internal only: Use ET_CHECK_OK_OR_RETURN_ERROR() instead.
#define TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_1(error__) \
do { \
const auto et_error__ = (error__); \
if (et_error__ != ::tokenizers::Error::Ok) { \
return et_error__; \
} \
}
} while (0)

// Internal only: Use ET_CHECK_OK_OR_RETURN_ERROR() instead.
#define TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_2(error__, message__, ...) \
do { \
const auto et_error__ = (error__); \
if (et_error__ != ::tokenizers::Error::Ok) { \
TK_LOG(Error, message__, ##__VA_ARGS__); \
return et_error__; \
} \
} while (0)

// Internal only: Use ET_CHECK_OK_OR_RETURN_ERROR() instead.
#define TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_3 \
TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_2
#define TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_4 \
TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_2
#define TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_5 \
TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_2
#define TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_6 \
TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_2
#define TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_7 \
TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_2
#define TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_8 \
TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_2
#define TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_9 \
TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_2
#define TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_10 \
TK_INTERNAL_CHECK_OK_OR_RETURN_ERROR_2
Loading

0 comments on commit acad533

Please sign in to comment.