|
36 | 36 |
|
37 | 37 | /* |
38 | 38 | * |
39 | | - * flb_encoding_open(encoding): |
40 | | - * iso-8859-1,... |
41 | | - * windows-1251 windows-1252, .. |
| 39 | + * flb_encoding_open(encoding,replacement): |
| 40 | + * |
| 41 | + * encoding: |
| 42 | + * iso-8859-1,... |
| 43 | + * windows-1251 windows-1252, .. |
| 44 | + * |
| 45 | + * replacement: |
| 46 | + * \R use replacement character 0xFFD (default) |
| 47 | + * \I ignore/skip bad chars |
| 48 | + * \E fail if bad chars |
| 49 | + * ... use that as replacement char |
42 | 50 | * |
43 | | - * <charset> - fail if bad chars |
44 | | - * <charset>//IGNORE - ignore bad chars |
45 | | - * <charset>//REPLACEMENT //R - use unicode replacement chars for bad chars |
46 | | - * <charset>//QUESTION //Q - use '?' for bad chars |
47 | | - * <charset>///<str> - use <str> for bad chars |
48 | 51 | */ |
49 | 52 |
|
50 | 53 |
|
51 | 54 | static unsigned char replacement_utf8[] = { 0xEF, 0xBF, 0xBD , 0 }; |
52 | 55 |
|
53 | | -struct flb_encoding *flb_encoding_open(const char *encoding) { |
| 56 | + |
| 57 | +static char *parse_replacement(const char *replacement) { |
| 58 | + |
| 59 | + if (!replacement) { |
| 60 | + return replacement_utf8; |
| 61 | + } |
| 62 | + if (!strcmp(replacement,"\\R")) { |
| 63 | + return replacement_utf8; |
| 64 | + } |
| 65 | + if (!strcmp(replacement,"\\I")) { |
| 66 | + return ""; |
| 67 | + } |
| 68 | + if (!strcmp(replacement,"\\?")) { |
| 69 | + return "?"; |
| 70 | + } |
| 71 | + if (!strcmp(replacement,"\\E")) { |
| 72 | + return NULL; |
| 73 | + } |
| 74 | + |
| 75 | + return replacement; |
| 76 | +} |
| 77 | + |
| 78 | +struct flb_encoding *flb_encoding_open(const char *encoding, const char *replacement) { |
54 | 79 | struct flb_encoding *ec; |
55 | 80 | TUTF8encoder encoder; |
56 | 81 | const char *invalid; |
57 | 82 | char *opt; |
58 | | - |
59 | | - if ((opt = strstr(encoding,"//")) != NULL) { |
60 | | - *opt = 0; |
61 | | - opt += 2; |
62 | | - if (*opt == '/') { |
63 | | - invalid = opt + 1; |
64 | | - } |
65 | | - else if (!strcmp(opt,"I") || !strcmp(opt,"IGNORE")) { |
66 | | - invalid = ""; |
67 | | - } |
68 | | - else if (!strcmp(opt,"R") || !strcmp(opt,"REPLACEMENT")) { |
69 | | - invalid = (const char *) replacement_utf8; |
70 | | - } |
71 | | - else if (!strcmp(opt,"Q") || !strcmp(opt,"QUESTION")) { |
72 | | - invalid = "?"; |
73 | | - } |
74 | | - else { |
75 | | - flb_error("[flb_encoding] unknown encodig option: %s", opt); |
76 | | - return NULL; |
77 | | - } |
78 | | - } |
79 | | - else { |
80 | | - invalid = NULL; |
81 | | - } |
82 | 83 |
|
83 | 84 | if ((encoder = tutf8e_encoder(encoding)) == NULL) { |
84 | 85 | flb_error("[flb_encoding] unknown encoding: %s", encoding); |
85 | 86 | return NULL; |
86 | 87 | } |
| 88 | + |
| 89 | + invalid = parse_replacement(replacement); |
87 | 90 |
|
88 | 91 | ec = flb_calloc(sizeof(struct flb_encoding),1); |
89 | 92 |
|
|
0 commit comments