|  | 
|  | 1 | +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ | 
|  | 2 | + | 
|  | 3 | +/*  Fluent Bit | 
|  | 4 | + *  ========== | 
|  | 5 | + *  Copyright (C) 2019      The Fluent Bit Authors | 
|  | 6 | + *  Copyright (C) 2015-2018 Treasure Data Inc. | 
|  | 7 | + * | 
|  | 8 | + *  Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | 9 | + *  you may not use this file except in compliance with the License. | 
|  | 10 | + *  You may obtain a copy of the License at | 
|  | 11 | + * | 
|  | 12 | + *      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 13 | + * | 
|  | 14 | + *  Unless required by applicable law or agreed to in writing, software | 
|  | 15 | + *  distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 16 | + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 17 | + *  See the License for the specific language governing permissions and | 
|  | 18 | + *  limitations under the License. | 
|  | 19 | + */ | 
|  | 20 | + | 
|  | 21 | +#include <stdio.h> | 
|  | 22 | + | 
|  | 23 | +#include <string.h> | 
|  | 24 | +#include <time.h> | 
|  | 25 | +#include <ctype.h> | 
|  | 26 | + | 
|  | 27 | +#include <fluent-bit/flb_macros.h> | 
|  | 28 | +#include <fluent-bit/flb_config.h> | 
|  | 29 | +#include <fluent-bit/flb_error.h> | 
|  | 30 | +#include <fluent-bit/flb_mem.h> | 
|  | 31 | +#include <fluent-bit/flb_str.h> | 
|  | 32 | +#include <fluent-bit/flb_encoding.h> | 
|  | 33 | + | 
|  | 34 | + | 
|  | 35 | +#include <tutf8e.h> | 
|  | 36 | + | 
|  | 37 | +/* | 
|  | 38 | + * | 
|  | 39 | + *  flb_encoding_open(encoding): | 
|  | 40 | + *  iso-8859-1,... | 
|  | 41 | + *  windows-1251 windows-1252, .. | 
|  | 42 | + *  | 
|  | 43 | + *  <charset>                     - fail if bad chars | 
|  | 44 | + *  <charset>//IGNORE             - ignore bad chars | 
|  | 45 | + *  <charset>//REPLACEMENT //R    - use unicode replacement chars for bad chars | 
|  | 46 | + *  <charset>//QUESTION  //Q      - use '?' for bad chars | 
|  | 47 | + *  <charset>///<str>             - use <str> for bad chars | 
|  | 48 | + */ | 
|  | 49 | + | 
|  | 50 | + | 
|  | 51 | +static  unsigned char replacement_utf8[] = { 0xEF, 0xBF, 0xBD , 0 }; | 
|  | 52 | + | 
|  | 53 | +struct flb_encoding *flb_encoding_open(const char *encoding) { | 
|  | 54 | +    struct flb_encoding *ec; | 
|  | 55 | +    TUTF8encoder encoder; | 
|  | 56 | +    const char *invalid; | 
|  | 57 | +    char *opt; | 
|  | 58 | + | 
|  | 59 | +    if ((opt = strstr(encoding,"//")) != NULL) { | 
|  | 60 | +        *opt = 0; | 
|  | 61 | +        opt += 2; | 
|  | 62 | +        if (*opt == '/') { | 
|  | 63 | +            invalid = opt + 1; | 
|  | 64 | +        } else if (!strcmp(opt,"I") || !strcmp(opt,"IGNORE")) { | 
|  | 65 | +            invalid = ""; | 
|  | 66 | +        } else if (!strcmp(opt,"R") || !strcmp(opt,"REPLACEMENT")) { | 
|  | 67 | +            invalid = (const char *) replacement_utf8; | 
|  | 68 | +        } else if (!strcmp(opt,"Q") || !strcmp(opt,"QUESTION")) { | 
|  | 69 | +            invalid = "?"; | 
|  | 70 | +        } else { | 
|  | 71 | +            flb_error("[flb_encoding] unknown encodig option: %s", opt); | 
|  | 72 | +        } | 
|  | 73 | +    } else { | 
|  | 74 | +        invalid = NULL; | 
|  | 75 | +    } | 
|  | 76 | +     | 
|  | 77 | +    if ((encoder = tutf8e_encoder(encoding)) == NULL) { | 
|  | 78 | +        flb_error("[flb_encoding] unknown encoding: %s", encoding); | 
|  | 79 | +        return NULL; | 
|  | 80 | +    } | 
|  | 81 | +    ec =   flb_calloc(sizeof(struct flb_encoding),1); | 
|  | 82 | +    ec->encoder = encoder; | 
|  | 83 | +    ec->invalid = invalid ? flb_strdup(invalid) : NULL; | 
|  | 84 | +    return ec; | 
|  | 85 | +} | 
|  | 86 | + | 
|  | 87 | + | 
|  | 88 | +int flb_encoding_decode(struct flb_encoding *ec, | 
|  | 89 | +                        char *str, size_t slen, | 
|  | 90 | +                        char **result, size_t *result_len)  | 
|  | 91 | +{ | 
|  | 92 | +    size_t outlen = 0; | 
|  | 93 | +    char *outbuf; | 
|  | 94 | +    int ret; | 
|  | 95 | + | 
|  | 96 | +    *result = NULL; | 
|  | 97 | +    *result_len = 0; | 
|  | 98 | + | 
|  | 99 | +    if (slen == 0) { | 
|  | 100 | +        *result = flb_strdup(""); | 
|  | 101 | +        *result_len = 0; | 
|  | 102 | +        return FLB_ENCODING_SUCCESS; | 
|  | 103 | +    } | 
|  | 104 | +     | 
|  | 105 | +    ret = tutf8e_encoder_buffer_length(ec->encoder, str, ec->invalid,  slen, &outlen); | 
|  | 106 | +     | 
|  | 107 | +    if (ret != TUTF8E_OK) { | 
|  | 108 | +        return FLB_ENCODING_FAILURE; | 
|  | 109 | +    } | 
|  | 110 | +     | 
|  | 111 | + | 
|  | 112 | +    outbuf = flb_malloc(outlen + 1); | 
|  | 113 | +    if(outbuf == NULL) { | 
|  | 114 | +        flb_error("[flb_encoding] out of memory (%zu)", (int) outlen  + 1); | 
|  | 115 | +        return FLB_ENCODING_FAILURE; | 
|  | 116 | +    } | 
|  | 117 | + | 
|  | 118 | +    ret = tutf8e_encoder_buffer_encode(ec->encoder, str, slen, ec->invalid, outbuf, &outlen); | 
|  | 119 | +     | 
|  | 120 | +    if (ret != TUTF8E_OK) { | 
|  | 121 | +        flb_free(outbuf); | 
|  | 122 | +        return FLB_ENCODING_FAILURE; | 
|  | 123 | +    } | 
|  | 124 | +    outbuf[outlen] = 0; | 
|  | 125 | +    *result = outbuf; | 
|  | 126 | +    *result_len = outlen; | 
|  | 127 | +     | 
|  | 128 | +    return FLB_ENCODING_SUCCESS; | 
|  | 129 | +} | 
|  | 130 | +     | 
|  | 131 | +void flb_encoding_close(struct flb_encoding *ec) { | 
|  | 132 | +    if (ec) { | 
|  | 133 | +        if (ec->invalid) { | 
|  | 134 | +            flb_free((char*)ec->invalid); | 
|  | 135 | +        } | 
|  | 136 | +    } | 
|  | 137 | +} | 
|  | 138 | + | 
0 commit comments