Skip to content

Commit 25ac200

Browse files
authored
RUST-1992 Introduce the &CStr and CString types for keys and regular expressions (#563)
1 parent 95774f9 commit 25ac200

33 files changed

+614
-348
lines changed

fuzz/generate_corpus.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use bson::{doc, Bson, Decimal128};
1+
use bson::{cstr, doc, Bson, Decimal128};
22
use std::{
33
fs,
44
io::{Error, ErrorKind},
@@ -64,7 +64,7 @@ fn generate_type_marker_cases(dir: &Path) -> std::io::Result<()> {
6464
"bool": true,
6565
"date": bson::DateTime::now(),
6666
"null": Bson::Null,
67-
"regex": Bson::RegularExpression(bson::Regex { pattern: "pattern".into(), options: "i".into() }),
67+
"regex": Bson::RegularExpression(bson::Regex { pattern: cstr!("pattern").into(), options: cstr!("i").into() }),
6868
"int32": 123i32,
6969
"timestamp": bson::Timestamp { time: 12345, increment: 1 },
7070
"int64": 123i64,

serde-tests/json.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use serde_json::json;
33

44
use super::AllTypes;
55

6-
use bson::{doc, Bson, JavaScriptCodeWithScope, RawArrayBuf, RawBson, RawDocumentBuf};
6+
use bson::{cstr, doc, Bson, JavaScriptCodeWithScope, RawArrayBuf, RawBson, RawDocumentBuf};
77

88
use serde::{Deserialize, Serialize};
99

@@ -99,18 +99,18 @@ fn owned_raw_bson() {
9999
});
100100

101101
let mut doc_buf = RawDocumentBuf::new();
102-
doc_buf.append("a", "key").unwrap();
103-
doc_buf.append("number", 12).unwrap();
104-
doc_buf.append("bool", false).unwrap();
105-
doc_buf.append("nu", RawBson::Null).unwrap();
102+
doc_buf.append(cstr!("a"), "key");
103+
doc_buf.append(cstr!("number"), 12);
104+
doc_buf.append(cstr!("bool"), false);
105+
doc_buf.append(cstr!("nu"), RawBson::Null);
106106

107107
let mut array_buf = RawArrayBuf::new();
108-
array_buf.push(1).unwrap();
109-
array_buf.push("string").unwrap();
108+
array_buf.push(1);
109+
array_buf.push("string");
110110

111111
let mut bson_doc = RawDocumentBuf::new();
112-
bson_doc.append("first", true).unwrap();
113-
bson_doc.append("second", "string").unwrap();
112+
bson_doc.append(cstr!("first"), true);
113+
bson_doc.append(cstr!("second"), "string");
114114

115115
let expected = Foo {
116116
doc_buf,

serde-tests/test.rs

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use std::{
1818
};
1919

2020
use bson::{
21+
cstr,
2122
doc,
2223
oid::ObjectId,
2324
spec::BinarySubtype,
@@ -835,8 +836,8 @@ fn raw_regex() {
835836

836837
let bytes = bson::serialize_to_vec(&doc! {
837838
"r": Regex {
838-
pattern: "a[b-c]d".to_string(),
839-
options: "ab".to_string(),
839+
pattern: cstr!("a[b-c]d").into(),
840+
options: cstr!("ab").into(),
840841
},
841842
})
842843
.expect("raw_regex");
@@ -927,8 +928,8 @@ impl AllTypes {
927928
};
928929
let date = DateTime::now();
929930
let regex = Regex {
930-
pattern: "hello".to_string(),
931-
options: "x".to_string(),
931+
pattern: cstr!("hello").into(),
932+
options: cstr!("x").into(),
932933
};
933934
let timestamp = Timestamp {
934935
time: 123,
@@ -1058,8 +1059,8 @@ fn all_raw_types_rmp() {
10581059
scope: doc! { "x": 1 },
10591060
},
10601061
"regex": Regex {
1061-
pattern: "pattern".to_string(),
1062-
options: "opt".to_string()
1062+
pattern: cstr!("pattern").into(),
1063+
options: cstr!("opt").into()
10631064
}
10641065
})
10651066
.unwrap();
@@ -1254,24 +1255,22 @@ fn owned_raw_types() {
12541255

12551256
let f = Foo {
12561257
subdoc: RawDocumentBuf::from_iter([
1257-
("a key", RawBson::String("a value".to_string())),
1258-
("an objectid", RawBson::ObjectId(oid)),
1259-
("a date", RawBson::DateTime(dt)),
1258+
(cstr!("a key"), RawBson::String("a value".to_string())),
1259+
(cstr!("an objectid"), RawBson::ObjectId(oid)),
1260+
(cstr!("a date"), RawBson::DateTime(dt)),
12601261
(
1261-
"code_w_scope",
1262+
cstr!("code_w_scope"),
12621263
RawBson::JavaScriptCodeWithScope(raw_code_w_scope.clone()),
12631264
),
1264-
("decimal128", RawBson::Decimal128(d128)),
1265-
])
1266-
.unwrap(),
1265+
(cstr!("decimal128"), RawBson::Decimal128(d128)),
1266+
]),
12671267
array: RawArrayBuf::from_iter([
12681268
RawBson::String("a string".to_string()),
12691269
RawBson::ObjectId(oid),
12701270
RawBson::DateTime(dt),
12711271
RawBson::JavaScriptCodeWithScope(raw_code_w_scope),
12721272
RawBson::Decimal128(d128),
1273-
])
1274-
.unwrap(),
1273+
]),
12751274
};
12761275

12771276
let expected = doc! {

src/bson.rs

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ use std::{
3131
use serde_json::{json, Value};
3232

3333
pub use crate::document::Document;
34-
use crate::{base64, oid, spec::ElementType, Binary, Decimal128};
34+
use crate::{base64, oid, raw::CString, spec::ElementType, Binary, Decimal128};
3535

3636
/// Possible BSON value types.
3737
#[derive(Clone, Default, PartialEq)]
@@ -268,6 +268,12 @@ impl From<String> for Bson {
268268
}
269269
}
270270

271+
impl From<crate::raw::CString> for Bson {
272+
fn from(a: crate::raw::CString) -> Bson {
273+
Bson::String(a.into_string())
274+
}
275+
}
276+
271277
impl From<Document> for Bson {
272278
fn from(a: Document) -> Bson {
273279
Bson::Document(a)
@@ -480,14 +486,14 @@ impl Bson {
480486
Bson::Boolean(v) => json!(v),
481487
Bson::Null => Value::Null,
482488
Bson::RegularExpression(Regex { pattern, options }) => {
483-
let mut chars: Vec<_> = options.chars().collect();
489+
let mut chars: Vec<_> = options.as_str().chars().collect();
484490
chars.sort_unstable();
485491

486492
let options: String = chars.into_iter().collect();
487493

488494
json!({
489495
"$regularExpression": {
490-
"pattern": pattern,
496+
"pattern": pattern.into_string(),
491497
"options": options,
492498
}
493499
})
@@ -619,7 +625,7 @@ impl Bson {
619625
ref pattern,
620626
ref options,
621627
}) => {
622-
let mut chars: Vec<_> = options.chars().collect();
628+
let mut chars: Vec<_> = options.as_str().chars().collect();
623629
chars.sort_unstable();
624630

625631
let options: String = chars.into_iter().collect();
@@ -842,7 +848,9 @@ impl Bson {
842848
if let Ok(regex) = doc.get_document("$regularExpression") {
843849
if let Ok(pattern) = regex.get_str("pattern") {
844850
if let Ok(options) = regex.get_str("options") {
845-
return Bson::RegularExpression(Regex::new(pattern, options));
851+
if let Ok(regex) = Regex::from_strings(pattern, options) {
852+
return Bson::RegularExpression(regex);
853+
}
846854
}
847855
}
848856
}
@@ -1147,7 +1155,7 @@ impl Timestamp {
11471155
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
11481156
pub struct Regex {
11491157
/// The regex pattern to match.
1150-
pub pattern: String,
1158+
pub pattern: CString,
11511159

11521160
/// The options for the regex.
11531161
///
@@ -1156,18 +1164,22 @@ pub struct Regex {
11561164
/// multiline matching, 'x' for verbose mode, 'l' to make \w, \W, etc. locale dependent,
11571165
/// 's' for dotall mode ('.' matches everything), and 'u' to make \w, \W, etc. match
11581166
/// unicode.
1159-
pub options: String,
1167+
pub options: CString,
11601168
}
11611169

11621170
impl Regex {
1163-
pub(crate) fn new(pattern: impl AsRef<str>, options: impl AsRef<str>) -> Self {
1171+
#[cfg(any(test, feature = "serde"))]
1172+
pub(crate) fn from_strings(
1173+
pattern: impl AsRef<str>,
1174+
options: impl AsRef<str>,
1175+
) -> crate::error::Result<Self> {
11641176
let mut chars: Vec<_> = options.as_ref().chars().collect();
11651177
chars.sort_unstable();
11661178
let options: String = chars.into_iter().collect();
1167-
Self {
1168-
pattern: pattern.as_ref().to_string(),
1169-
options,
1170-
}
1179+
Ok(Self {
1180+
pattern: pattern.as_ref().to_string().try_into()?,
1181+
options: options.try_into()?,
1182+
})
11711183
}
11721184
}
11731185

src/de/raw.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1306,15 +1306,15 @@ impl<'de> serde::de::Deserializer<'de> for &mut RegexAccess<'de> {
13061306
RegexDeserializationStage::Pattern => {
13071307
self.stage = RegexDeserializationStage::Options;
13081308
match &self.re {
1309-
BsonCow::Borrowed(re) => visitor.visit_borrowed_str(re.pattern),
1310-
BsonCow::Owned(re) => visitor.visit_str(&re.pattern),
1309+
BsonCow::Borrowed(re) => visitor.visit_borrowed_str(re.pattern.as_str()),
1310+
BsonCow::Owned(re) => visitor.visit_str(re.pattern.as_str()),
13111311
}
13121312
}
13131313
RegexDeserializationStage::Options => {
13141314
self.stage = RegexDeserializationStage::Done;
13151315
match &self.re {
1316-
BsonCow::Borrowed(re) => visitor.visit_borrowed_str(re.options),
1317-
BsonCow::Owned(re) => visitor.visit_str(&re.options),
1316+
BsonCow::Borrowed(re) => visitor.visit_borrowed_str(re.options.as_str()),
1317+
BsonCow::Owned(re) => visitor.visit_str(re.options.as_str()),
13181318
}
13191319
}
13201320
RegexDeserializationStage::Done => {

src/de/serde.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,10 @@ impl<'de> Visitor<'de> for BsonVisitor {
442442

443443
"$regularExpression" => {
444444
let re = visitor.next_value::<extjson::models::RegexBody>()?;
445-
return Ok(Bson::RegularExpression(Regex::new(re.pattern, re.options)));
445+
return Ok(Bson::RegularExpression(
446+
Regex::from_strings(re.pattern, re.options)
447+
.map_err(serde::de::Error::custom)?,
448+
));
446449
}
447450

448451
"$dbPointer" => {

src/extjson/de.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ impl TryFrom<serde_json::Map<String, serde_json::Value>> for Bson {
5555

5656
if obj.contains_key("$regularExpression") {
5757
let regex: models::Regex = serde_json::from_value(obj.into())?;
58-
return Ok(regex.parse().into());
58+
return Ok(regex.parse()?.into());
5959
}
6060

6161
if obj.contains_key("$numberInt") {

src/extjson/models.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,8 @@ pub(crate) struct RegexBody {
122122
}
123123

124124
impl Regex {
125-
pub(crate) fn parse(self) -> crate::Regex {
126-
crate::Regex::new(self.body.pattern, self.body.options)
125+
pub(crate) fn parse(self) -> crate::error::Result<crate::Regex> {
126+
crate::Regex::from_strings(self.body.pattern, self.body.options)
127127
}
128128
}
129129

src/macros.rs

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -240,12 +240,12 @@ macro_rules! rawbson {
240240

241241
// Finished with trailing comma.
242242
(@array [$($elems:expr,)*]) => {
243-
$crate::RawArrayBuf::from_iter(vec![$($elems,)*]).expect("invalid bson value")
243+
$crate::RawArrayBuf::from_iter(vec![$($elems,)*])
244244
};
245245

246246
// Finished without trailing comma.
247247
(@array [$($elems:expr),*]) => {
248-
$crate::RawArrayBuf::from_iter(vec![$($elems),*]).expect("invalid bson value")
248+
$crate::RawArrayBuf::from_iter(vec![$($elems),*])
249249
};
250250

251251
// Next element is `null`.
@@ -291,15 +291,26 @@ macro_rules! rawbson {
291291
// Finished.
292292
(@object $object:ident () () ()) => {};
293293

294-
// Insert the current entry followed by trailing comma.
295-
(@object $object:ident [$($key:tt)+] ($value:expr) , $($rest:tt)*) => {
296-
$object.append(($($key)+), $value).expect("invalid bson value");
294+
// Insert the current entry with followed by trailing comma, with a key literal.
295+
(@object $object:ident [$key:literal] ($value:expr) , $($rest:tt)*) => {{
296+
$object.append($crate::raw::cstr!($key), $value);
297+
$crate::rawbson!(@object $object () ($($rest)*) ($($rest)*));
298+
}};
299+
300+
// Insert the current entry with followed by trailing comma, with a key expression.
301+
(@object $object:ident [$($key:tt)+] ($value:expr) , $($rest:tt)*) => {{
302+
$object.append($($key)+, $value);
297303
$crate::rawbson!(@object $object () ($($rest)*) ($($rest)*));
304+
}};
305+
306+
// Insert the last entry without trailing comma, with a key literal.
307+
(@object $object:ident [$key:literal] ($value:expr)) => {
308+
$object.append($crate::raw::cstr!($key), $value);
298309
};
299310

300-
// Insert the last entry without trailing comma.
311+
// Insert the last entry without trailing comma, with a key expression.
301312
(@object $object:ident [$($key:tt)+] ($value:expr)) => {
302-
$object.append(($($key)+), $value).expect("invalid bson value");
313+
$object.append($($key)+, $value);
303314
};
304315

305316
// Next value is `null`.

src/raw.rs

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ mod array;
116116
mod array_buf;
117117
mod bson;
118118
mod bson_ref;
119+
mod cstr;
119120
mod document;
120121
mod document_buf;
121122
mod iter;
@@ -142,6 +143,7 @@ pub use self::{
142143
RawJavaScriptCodeWithScopeRef,
143144
RawRegexRef,
144145
},
146+
cstr::{assert_valid_cstr, cstr, validate_cstr, CStr, CString, IsValidCStr},
145147
document::RawDocument,
146148
document_buf::{BindRawBsonRef, BindValue, RawDocumentBuf},
147149
iter::{RawElement, RawIter},
@@ -316,15 +318,3 @@ pub(crate) fn write_string(buf: &mut Vec<u8>, s: &str) {
316318
buf.extend(s.as_bytes());
317319
buf.push(0);
318320
}
319-
320-
pub(crate) fn write_cstring(buf: &mut Vec<u8>, s: &str) -> Result<()> {
321-
if s.contains('\0') {
322-
return Err(Error::malformed_bytes(format!(
323-
"cstring with interior null: {:?}",
324-
s
325-
)));
326-
}
327-
buf.extend(s.as_bytes());
328-
buf.push(0);
329-
Ok(())
330-
}

0 commit comments

Comments
 (0)