Skip to content

[CDRIVER-5983] Refactor String Handling Around URI Parsing #2047

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 38 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
461d468
`mlib/str.h` - String utilities
vector-of-bool May 1, 2025
ad481cc
Modifications and extensions of `write_concern`
vector-of-bool May 6, 2025
1f73472
Fix docs builds with older Sphinx 7.1
vector-of-bool May 6, 2025
764f439
Error utilities for clearing/reseting an error obj
vector-of-bool May 6, 2025
eb549b5
A more robust integer parsing function
vector-of-bool May 6, 2025
d353579
"because" assertions
vector-of-bool May 16, 2025
70ea5ed
Merge branch 'master' into CDRIVER-5983-uri-param-refactor
vector-of-bool Jun 16, 2025
97c35c3
str_split_around algorithm
vector-of-bool Jun 17, 2025
8013f27
`find_first_of` algorithm
vector-of-bool Jun 17, 2025
842dfbf
`str_contains(_any_of)` algo
vector-of-bool Jun 18, 2025
e7160fb
Rename `mlib_str...` to `mstr...`
vector-of-bool Jun 19, 2025
fc6bdd6
Support negative indexing
vector-of-bool Jun 19, 2025
11bb078
Case normalization in mlib/str
vector-of-bool Jun 25, 2025
2f6da2d
Use sized strings and algos throughout URI parsing
vector-of-bool Jun 25, 2025
aba486e
Integer parsing using sized strings
vector-of-bool Jun 26, 2025
33d8126
Refactor %-encoding and integer parsing
vector-of-bool Jun 26, 2025
4b96578
Allow passing an error string to reformat itself
vector-of-bool Jun 26, 2025
3ebe7f8
Missing `inline` spec
vector-of-bool Jun 26, 2025
705680a
Unused expr warnings
vector-of-bool Jun 26, 2025
9eb3503
misc goofs
vector-of-bool Jun 26, 2025
818096e
Fix missing-init warning
vector-of-bool Jun 26, 2025
d0d7e09
Tweaked error message for maxstalenessseconds
vector-of-bool Jun 26, 2025
9d71a8f
uninit warnings
vector-of-bool Jun 26, 2025
0f67e11
Tweak logging behavior around URI parse errors
vector-of-bool Jun 26, 2025
55cab60
uninit vars
vector-of-bool Jun 26, 2025
02657a7
Merge branch 'master' into CDRIVER-5983-uri-param-refactor
vector-of-bool Jun 26, 2025
058a2f2
Simplify parsing of host specifiers
vector-of-bool Jun 26, 2025
c894c20
Use formatting shorthand macro
vector-of-bool Jun 26, 2025
7eeeda3
Merge branch 'master' into CDRIVER-5983-uri-param-refactor
vector-of-bool Jun 26, 2025
727e3a7
Minor formatting
vector-of-bool Jun 26, 2025
f0952cc
[fixup] error message for ipv6 parsing
vector-of-bool Jun 26, 2025
5044dd7
Remove stray newline in test URI string
vector-of-bool Jun 26, 2025
6f53c7a
Fixup `bson_set_error` to use a temp string buffer
vector-of-bool Jun 26, 2025
a5f06e1
Free the wtag on setting it
vector-of-bool Jun 26, 2025
565d167
Fix integer boundary condition around INT64_MIN
vector-of-bool Jun 26, 2025
7416c4f
Minor tweaks and cleanup
vector-of-bool Jun 27, 2025
eede34c
Rename `mlib_cstring` and `mstr_substr`
vector-of-bool Jun 27, 2025
fe504f1
Revert "Modifications and extensions of `write_concern`"
vector-of-bool Jun 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions build/sphinx/mongoc_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from sphinx.application import Sphinx
from sphinx.application import logger as sphinx_log

try:
from sphinx.builders.dirhtml import DirectoryHTMLBuilder
except ImportError:
Expand All @@ -16,7 +17,8 @@
from sphinx.config import Config
from docutils.parsers.rst import Directive

needs_sphinx = "1.7" # Do not require newer sphinx. EPEL packages build man pages with Sphinx 1.7.6. Refer: CDRIVER-4767
# Do not require newer sphinx. EPEL packages build man pages with Sphinx 1.7.6. Refer: CDRIVER-4767
needs_sphinx = "1.7"
author = "MongoDB, Inc"

# -- Options for HTML output ----------------------------------------------
Expand All @@ -38,7 +40,8 @@ def _file_man_page_name(fpath: Path) -> Union[str, None]:
continue
return mat[1]

def _collect_man (app: Sphinx):

def _collect_man(app: Sphinx):
# Note: 'app' is partially-formed, as this is called from the Sphinx.__init__
docdir = Path(app.srcdir)
# Find everything:
Expand All @@ -61,6 +64,7 @@ def _collect_man (app: Sphinx):
assert docname, filepath
man_pages.append((docname, man_name, "", [author], 3))


# -- Options for manual page output ---------------------------------------

# NOTE: This starts empty, but we populate it in `setup` in _collect_man() (see above)
Expand Down Expand Up @@ -168,6 +172,7 @@ def generate_html_redirs(app: Sphinx, page: str, templatename: str, context: Dic
builder.css_files[:] = prev_css
sphinx_log.debug("Wrote redirect: %r -> %r", path, page)


def mongoc_common_setup(app: Sphinx):
_collect_man(app)
app.connect("html-page-context", generate_html_redirs)
Expand Down
184 changes: 184 additions & 0 deletions src/common/src/mlib/intencode.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@

#include <mlib/config.h>
#include <mlib/loop.h>
#include <mlib/str.h>

#include <errno.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>

Expand Down Expand Up @@ -165,3 +168,184 @@ mlib_write_f64le (void *out, double d)
memcpy (&bits, &d, sizeof d);
return mlib_write_u64le (out, bits);
}

/**
* @brief Decode a 64-bit natural number
*
* @param in The input string to be decoded. Does not support a sign or base prefix!
* @param base The base to be decoded. Must not be zero!
* @param out Pointer that receives the decoded value
* @return int A result code for the operation.
*
* See `mlib_i64_parse` for more details.
*/
static inline int
mlib_nat64_parse (mstr_view in, int base, uint64_t *out)
{
if (in.len == 0) {
// Empty string is not valid
return EINVAL;
}


// Accummulate into this value:
uint64_t value = 0;
// Whether any operation in the parse overflowed the integer value
bool did_overflow = false;
// Loop until we have consumed the full string, or encounter an invalid digit
while (in.len) {
// Shift place value for another digit
did_overflow = mlib_mul (&value, base) || did_overflow;
// Case-fold for alpha digits
int32_t digit = mlib_latin_tolower (in.data[0]);
int digit_value = 0;
// Only standard digits
if (digit >= '0' && digit <= '9') {
// Normal digit
digit_value = digit - '0';
} else if (digit >= 'a' && digit <= 'z') {
// Letter digits
digit_value = (digit - 'a') + 10;
} else {
// Not a valid alnum digit
return EINVAL;
}
if (digit_value >= base) {
// The digit value is out-of-range for our chosen base
return EINVAL;
}
// Accumulate the new digit value
did_overflow = mlib_add (&value, digit_value) || did_overflow;
// Jump to the next digit in the string
in = mstr_substr (in, 1);
}

if (did_overflow) {
return ERANGE;
}

(void) (out && (*out = value));
return 0;
}

/**
* @brief Parse a string as a 64-bit signed integer
*
* @param in The string of digits to be parsed.
* @param base Optional: The base to use for parsing. Use "0" to infer the base.
* @param out Optional storage for an int64 value to be updated with the result
* @return int Returns an errno value for the parse
*
* - A value of `0` indicates that the parse was successful.
* - A value of `EINVAL` indicates that the input string is not a valid
* representation of an integer.
* - A value of `ERANGE` indicates thath the input string is a valid integer,
* but the actual encoded value cannot be represented in an `int64_t`
* - If the parse fails (returns non-zero), then the value at `*out` will remain
* unmodified.
*
* This differs from `strtoll` in that it requires that the entire string be
* parsed as a valid integer. If parsing stops early, then the result will indicate
* an error of EINVAL.
*/
static inline int
mlib_i64_parse (mstr_view in, int base, int64_t *out)
{
if (in.len == 0) {
// Empty string is not a valid integer
return EINVAL;
}
// Parse the possible sign prefix
int sign = 1;
// Check for a "+"
if (in.data[0] == '+') {
// Just a plus. Drop it and do nothing with it.
in = mstr_substr (in, 1);
}
// Check for a negative prefix
else if (in.data[0] == '-') {
// Negative sign. We'll negate the value later.
in = mstr_substr (in, 1);
sign = -1;
}

// Infer the base value, if we have one
if (base == 0) {
if (in.data[0] == '0') {
if (in.len > 1) {
if (mlib_latin_tolower (in.data[1]) == 'x') {
// Hexadecimal
base = 16;
in = mstr_substr (in, 2);
} else if (mlib_latin_tolower (in.data[1]) == 'o') {
// Octal
base = 8;
in = mstr_substr (in, 2);
} else if (mlib_latin_tolower (in.data[1]) == 'b') {
// Binary
base = 2;
in = mstr_substr (in, 2);
}
}
if (base == 0) {
// Other: Octal with a single "0" prefix. Don't trim this, because
// it may be a literal "0"
base = 8;
}
} else {
// No '0' prefix. Treat it as decimal
base = 10;
}
}

// Try to parse the natural number now that we have removed all prefixes and
// have a non-zero base.
uint64_t nat;
int rc = mlib_nat64_parse (in, base, &nat);
if (rc) {
return rc;
}

// Try to narrow from the u64 to i64 and apply the sign. This must be done as
// one operation because of the pathological case of parsing INT64_MIN
int64_t i64 = 0;
if (mlib_mul (&i64, nat, sign)) {
return ERANGE;
}

(void) (out && (*out = i64));
return 0;
}

#define mlib_i64_parse(...) MLIB_ARGC_PICK (_mlib_i64_parse, __VA_ARGS__)
#define _mlib_i64_parse_argc_2(S, Ptr) _mlib_i64_parse_argc_3 ((S), 0, (Ptr))
#define _mlib_i64_parse_argc_3(S, Base, Ptr) mlib_i64_parse (mstr_view_from ((S)), Base, Ptr)

/**
* @brief Parse a 32-bit integer from a string.
*
* See `mlib_i64_parse` for more details.
*/
static inline int
mlib_i32_parse (mstr_view in, int base, int32_t *out)
{
int64_t tmp;
int ec = mlib_i64_parse (in, base, &tmp);
if (ec) {
// Failed to parse the int64 value.
return ec;
}
// Attempt to narrow to a 32-bit value
int32_t i32 = 0;
if (mlib_narrow (&i32, tmp)) {
// Value is out-of-range
return ERANGE;
}
// Success
(void) (out && (*out = i32));
return 0;
}

#define mlib_i32_parse(...) MLIB_ARGC_PICK (_mlib_i32_parse, __VA_ARGS__)
#define _mlib_i32_parse_argc_2(S, Ptr) _mlib_i32_parse_argc_3 ((S), 0, (Ptr))
#define _mlib_i32_parse_argc_3(S, Base, Ptr) mlib_i32_parse (mstr_view_from ((S)), Base, Ptr)
Loading