From a50dbe1586d37619f91fdfc6e506060510f885a7 Mon Sep 17 00:00:00 2001 From: Sergey Noskov Date: Tue, 20 Aug 2019 09:53:49 +0300 Subject: [PATCH 01/23] Basic tags detection support --- Cargo.toml | 1 - src/lib.rs | 2 +- src/parser.rs | 369 ++++++++++++++++++++++++++++++++++---------------- 3 files changed, 252 insertions(+), 120 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 86038cb..abfe87f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,4 +21,3 @@ num-traits="^0.2" [build-dependencies] capnpc = "^0.10" - diff --git a/src/lib.rs b/src/lib.rs index d614d0c..0cb4e1e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ pub mod metric; +pub mod name; pub mod parser; pub use crate::metric::*; @@ -6,4 +7,3 @@ pub use crate::metric::*; pub mod protocol_capnp { include!(concat!(env!("OUT_DIR"), "/schema/protocol_capnp.rs")); } - diff --git a/src/parser.rs b/src/parser.rs index 66d06c4..01581a7 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -5,49 +5,74 @@ use std::str::from_utf8; use std::str::FromStr; use combine::byte::{byte, bytes, digit, newline}; -use combine::choice; use combine::combinator::{eof, skip_many}; use combine::error::{ParseError, StreamError}; use combine::parser::range::{recognize, take, take_until_range, take_while1}; -use combine::stream::{decode, RangeStream, StreamErrorFor, PointerOffset}; +use combine::stream::{decode, PointerOffset, RangeStream, StreamErrorFor}; +use combine::{choice, position}; use combine::{optional, skip_many1, Parser}; use bytes::{Bytes, BytesMut}; -use crate::metric::{Metric, MetricType, FromF64}; -use num_traits::{Float, AsPrimitive}; +use crate::metric::{FromF64, Metric, MetricType}; +use crate::name::MetricName; +use num_traits::{AsPrimitive, Float}; -// Used for returning parsing result +/// Used for returning parsing result #[derive(Debug)] pub enum ParsedPart where -F: Float + FromStr + Debug + AsPrimitive, + F: Float + FromStr + Debug + AsPrimitive, { - Metric((PointerOffset, PointerOffset), Metric), + Metric( + (PointerOffset, PointerOffset), + Option, + Metric, + ), Trash, TotalTrash, } +/// The parser signature may seem cryptic, but it can mostly be ignored totally and get used as +/// simple as +/// (TODO: code example) +/// +/// It's current goal is to be fast, use less allocs and to not depend on error and even probably input typing pub fn metric_stream_parser<'a, I, F>( max_unparsed: usize, - ) -> impl Parser, PartialState = impl Default + 'a> +) -> impl Parser, PartialState = impl Default + 'a> where -I: RangeStream + std::fmt::Debug, -I::Error: ParseError, -F: 'a + Float + Debug + FromStr + AsPrimitive + FromF64 + Sync, -::Err: std::error::Error + Sync + Send + 'static, + I: RangeStream + std::fmt::Debug, + I::Error: ParseError, + F: 'a + Float + Debug + FromStr + AsPrimitive + FromF64 + Sync, + ::Err: std::error::Error + Sync + Send + 'static, { - use combine::position; - let name = (position(), take_while1::(|c: u8| c != b':' && c != b'\n'), position()) + // to avoid allocations, we only find a position of a name in buffer + let name_with_tags = ( + position(), + take_while1::(|c: u8| c != b';' && c != b':' && c != b'\n'), + optional(( + byte(b';'), + position(), + take_while1::(|c: u8| c != b':' && c != b'\n'), + )), + position(), + ) .skip(byte(b':')) - .and_then(|(start, name, stop)| { - //let len =stop - start;// name.len(); - from_utf8(name) - .map_err(|_e| { - StreamErrorFor::::unexpected_static_message("name is not valid utf8") - }) - .map(|_| (start, stop)) - //TODO: introduce metric name limits, use is.alphabetical for each unicode char + .and_then(|(start, name, maybe_tag, stop)| { + //TODO: introduce metric name limits, use is.alphabetical for each unicode char + from_utf8(name).map_err(|_e| { + StreamErrorFor::::unexpected_static_message("name part is not valid utf8") + })?; + let tag_pos = if let Some((_, tag_pos, tag)) = maybe_tag { + from_utf8(tag).map_err(|_e| { + StreamErrorFor::::unexpected_static_message("tag part is not valid utf8") + })?; + Some(tag_pos) + } else { + None + }; + Ok::<_, StreamErrorFor>((start, tag_pos, stop)) }); let sign = byte(b'+').map(|_| 1i8).or(byte(b'-').map(|_| -1i8)); @@ -72,10 +97,10 @@ F: 'a + Float + Debug + FromStr + AsPrimitive + FromF64 + Sync, let unsigned_float = skip_many1(digit()) .and(optional((byte(b'.'), skip_many1(digit())))) .and(optional(( - byte(b'e'), - optional(byte(b'+').or(byte(b'-'))), - skip_many1(digit()), - ))); + byte(b'e'), + optional(byte(b'+').or(byte(b'-'))), + skip_many1(digit()), + ))); let sampling = (bytes(b"|@"), recognize(unsigned_float)).and_then(|(_, val)| { // TODO replace from_utf8 with handmade parser removing recognize @@ -89,12 +114,12 @@ F: 'a + Float + Debug + FromStr + AsPrimitive + FromF64 + Sync, val, mtype, choice(( - sampling.map(|v| Some(v)), - skip_many(newline()).map(|_| None), - eof().map(|_| None), - //skip_many(newline()).map(|_| None), - )), - ) + sampling.map(|v| Some(v)), + skip_many(newline()).map(|_| None), + eof().map(|_| None), + //skip_many(newline()).map(|_| None), + )), + ) .map(|(sign, mut val, mtype, sampling)| { let mtype = if let MetricType::Gauge(_) = mtype { MetricType::Gauge(sign) @@ -111,14 +136,18 @@ F: 'a + Float + Debug + FromStr + AsPrimitive + FromF64 + Sync, // here's what we are trying to parse choice(( - // valid metric - (skip_many(newline()), name, metric, skip_many(newline())).map(|(_, n, m, _)| ParsedPart::Metric(n, m)), - // trash ending with \n - //take_until_byte(b'\n').map(|_| ParsedPart::Trash), - (take_until_range(&b"\n"[..]), skip_many1(newline())).map(|_| ParsedPart::Trash), - // trash not ending with \n, but too long to be metric - take(max_unparsed).map(|_| ParsedPart::TotalTrash), - )) + // valid metric with (probably) tags + ( + skip_many(newline()), + name_with_tags, + metric, + skip_many(newline()), + ) + .map(|(_, (start, tag, stop), m, _)| ParsedPart::Metric((start, stop), tag, m)), + (take_until_range(&b"\n"[..]), skip_many1(newline())).map(|_| ParsedPart::Trash), + // trash not ending with \n, but too long to be metric + take(max_unparsed).map(|_| ParsedPart::TotalTrash), + )) } #[allow(unused_variables)] @@ -141,7 +170,8 @@ pub struct MetricParser<'a, F, E: ParseErrorHandler> { } impl<'a, F, E> MetricParser<'a, F, E> -where E: ParseErrorHandler +where + E: ParseErrorHandler, { pub fn new(input: &'a mut BytesMut, max_unparsed: usize, handler: E) -> Self { Self { @@ -154,13 +184,13 @@ where E: ParseErrorHandler } } -impl<'a, F,E> Iterator for MetricParser<'a, F, E> +impl<'a, F, E> Iterator for MetricParser<'a, F, E> where -E: ParseErrorHandler, -F: Float + FromStr + AsPrimitive + FromF64 + Debug + Sync , -::Err: std::error::Error + Sync + Send + 'static, + E: ParseErrorHandler, + F: Float + FromStr + AsPrimitive + FromF64 + Debug + Sync, + ::Err: std::error::Error + Sync + Send + 'static, { - type Item = (Bytes, Metric); + type Item = (MetricName, Metric); fn next(&mut self) -> Option { loop { if self.skip >= self.input.len() { @@ -180,7 +210,7 @@ F: Float + FromStr + AsPrimitive + FromF64 + Debug + Sync , parser, combine::easy::Stream(input), &mut Default::default(), - ); + ); res }; @@ -190,19 +220,22 @@ F: Float + FromStr + AsPrimitive + FromF64 + Debug + Sync , // end the iteration to get the buffer filled return None; } - Ok((Some(ParsedPart::Metric(namepos, metric)), consumed)) => { + Ok((Some(ParsedPart::Metric(name_pos, tag_pos, metric)), consumed)) => { // at this point our input buffer looks like this // [bad_data][useless_data][name][metric] + // we consider a metric name WITH tags as a "name" here // self.skip point to end of bad_data // `consumed` contains length ot all valid data (from useless to metric end) - // namepos.0 points at the start of name - // namepos.1 points at the end of name + // name_pos.0 points at the start of name + // name_pos.1 points at the end of name // translate_position requires the pointers on original input to match // so before changing anything, we want to count name position let input = &self.input[self.skip..]; - let start = namepos.0.translate_position(input); - let stop = namepos.1.translate_position(input); + let start = name_pos.0.translate_position(input); + let stop = name_pos.1.translate_position(input); + let tag_pos = tag_pos.map(|pos| pos.translate_position(input)); + // before touching the buffer calculate position to advance after name let metriclen = consumed - stop; @@ -224,7 +257,7 @@ F: Float + FromStr + AsPrimitive + FromF64 + Debug + Sync , self.skip = 0; - return Some((name, metric)); + return Some((MetricName::new(name, tag_pos), metric)); } Ok((Some(ParsedPart::Trash), consumed)) => { // trash matched @@ -236,7 +269,6 @@ F: Float + FromStr + AsPrimitive + FromF64 + Debug + Sync , self.skip += 1; } else { self.skip += consumed; - } self.handler.handle(self.input, self.skip); } @@ -261,7 +293,6 @@ F: Float + FromStr + AsPrimitive + FromF64 + Debug + Sync , // in that case we can try to skip all the bytes until those where error // happened - let input = &self.input[self.skip..]; let skip = e.position.translate_position(input); self.handler.handle(self.input, self.skip); @@ -275,7 +306,7 @@ F: Float + FromStr + AsPrimitive + FromF64 + Debug + Sync , Some(pos) => self.input.advance(pos), None => self.input.clear(), } - //cut all the input otherwise + //cut all the input otherwise } else { // cut buffer to position where error was found self.skip = 0; @@ -299,23 +330,27 @@ mod tests { impl ParseErrorHandler for TestParseErrorHandler { fn handle(&self, input: &[u8], pos: usize) { println!( - "parse error at {:?} in {:?}", pos, + "parse error at {:?} in {:?}", + pos, String::from_utf8_lossy(input) ); } } fn make_parser(input: &mut BytesMut) -> MetricParser { - MetricParser::::new( input, 100, TestParseErrorHandler) + MetricParser::::new(input, 100, TestParseErrorHandler) } #[test] fn parse_metric_good_counter() { let mut data = BytesMut::from(&b"gorets:1|c|@1"[..]); let mut parser = make_parser(&mut data); - let (name, metric) = parser.next().unwrap(); - assert_eq!(&name[..], &b"gorets"[..]); - assert_eq!(metric, Metric::::new(1f64, MetricType::Counter, None, Some(1f32)).unwrap()); + let (name, metric) = parser.next().unwrap(); + assert_eq!(&name.name[..], &b"gorets"[..]); + assert_eq!( + metric, + Metric::::new(1f64, MetricType::Counter, None, Some(1f32)).unwrap() + ); assert_eq!(parser.next(), None); } @@ -324,39 +359,57 @@ mod tests { fn parse_metric_good_counter_float() { let mut data = BytesMut::from(&b"gorets:12.65|c|@0.001"[..]); let mut parser = make_parser(&mut data); - let (name, metric) = parser.next().unwrap(); - assert_eq!(&name[..], &b"gorets"[..]); - assert_eq!(metric, Metric::::new(12.65f64, MetricType::Counter, None, Some(1e-3f32)).unwrap()); + let (name, metric) = parser.next().unwrap(); + assert_eq!(&name.name[..], &b"gorets"[..]); + assert_eq!( + metric, + Metric::::new(12.65f64, MetricType::Counter, None, Some(1e-3f32)).unwrap() + ); assert_eq!(parser.next(), None); } #[test] fn parse_metric_with_newline() { - let mut data = BytesMut::from(&b"complex.bioyino.test1:-1e10|g\n\ncomplex.bioyino.test10:-1e10|g\n\n\n"[..]); + let mut data = BytesMut::from( + &b"complex.bioyino.test1:-1e10|g\n\ncomplex.bioyino.test10:-1e10|g\n\n\n"[..], + ); let mut parser = make_parser(&mut data); - let (name, metric) = parser.next().unwrap(); - assert_eq!(&name[..], &b"complex.bioyino.test1"[..]); - assert_eq!(metric, Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap()); - - let (name, metric) = parser.next().unwrap(); - assert_eq!(&name[..], &b"complex.bioyino.test10"[..]); - assert_eq!(metric, Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap()); + let (name, metric) = parser.next().unwrap(); + assert_eq!(&name.name[..], &b"complex.bioyino.test1"[..]); + assert_eq!( + metric, + Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap() + ); + + let (name, metric) = parser.next().unwrap(); + assert_eq!(&name.name[..], &b"complex.bioyino.test10"[..]); + assert_eq!( + metric, + Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap() + ); assert_eq!(parser.next(), None); } #[test] fn parse_metric_without_newline() { - let mut data = BytesMut::from(&b"complex.bioyino.test1:-1e10|gcomplex.bioyino.test10:-1e10|g"[..]); + let mut data = + BytesMut::from(&b"complex.bioyino.test1:-1e10|gcomplex.bioyino.test10:-1e10|g"[..]); let mut parser = make_parser(&mut data); - let (name, metric) = parser.next().unwrap(); - assert_eq!(&name[..], &b"complex.bioyino.test1"[..]); - assert_eq!(metric, Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap()); - - let (name, metric) = parser.next().unwrap(); - assert_eq!(&name[..], &b"complex.bioyino.test10"[..]); - assert_eq!(metric, Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap()); + let (name, metric) = parser.next().unwrap(); + assert_eq!(&name.name[..], &b"complex.bioyino.test1"[..]); + assert_eq!( + metric, + Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap() + ); + + let (name, metric) = parser.next().unwrap(); + assert_eq!(&name.name[..], &b"complex.bioyino.test10"[..]); + assert_eq!( + metric, + Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap() + ); assert_eq!(parser.next(), None); } @@ -365,12 +418,18 @@ mod tests { fn parse_metric_without_newline_sampling() { let mut data = BytesMut::from(&b"gorets:+1000|g|@0.4e-3gorets:-1000|g|@0.5"[..]); let mut parser = make_parser(&mut data); - let (name, metric) = parser.next().unwrap(); - assert_eq!(&name[..], &b"gorets"[..]); - assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, Some(0.0004)).unwrap()); - let (name, metric) = parser.next().unwrap(); - assert_eq!(&name[..], &b"gorets"[..]); - assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap()); + let (name, metric) = parser.next().unwrap(); + assert_eq!(&name.name[..], &b"gorets"[..]); + assert_eq!( + metric, + Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, Some(0.0004)).unwrap() + ); + let (name, metric) = parser.next().unwrap(); + assert_eq!(&name.name[..], &b"gorets"[..]); + assert_eq!( + metric, + Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap() + ); assert_eq!(parser.next(), None); } @@ -378,9 +437,12 @@ mod tests { fn parse_metric_short() { let mut data = BytesMut::from(&b"gorets:1|c"[..]); let mut parser = make_parser(&mut data); - let (name, metric) = parser.next().unwrap(); - assert_eq!(&name[..], &b"gorets"[..]); - assert_eq!(metric, Metric::::new(1f64, MetricType::Counter, None, None).unwrap()); + let (name, metric) = parser.next().unwrap(); + assert_eq!(&name.name[..], &b"gorets"[..]); + assert_eq!( + metric, + Metric::::new(1f64, MetricType::Counter, None, None).unwrap() + ); assert_eq!(parser.next(), None); } @@ -388,16 +450,22 @@ mod tests { fn parse_metric_many() { let mut data = BytesMut::from(&b"gorets:+1000|g\ngorets:-1000|g|@0.5"[..]); let mut parser = make_parser(&mut data); - let (name, metric) = parser.next().unwrap(); - assert_eq!(&name[..], &b"gorets"[..]); - assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, None).unwrap()); - let (name, metric) = parser.next().unwrap(); - assert_eq!(&name[..], &b"gorets"[..]); - assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap()); + let (name, metric) = parser.next().unwrap(); + assert_eq!(&name.name[..], &b"gorets"[..]); + assert_eq!( + metric, + Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, None).unwrap() + ); + let (name, metric) = parser.next().unwrap(); + assert_eq!(&name.name[..], &b"gorets"[..]); + assert_eq!( + metric, + Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap() + ); assert_eq!(parser.next(), None); } -#[test] + #[test] fn parse_metric_bad_utf8() { use bytes::BufMut; let mut data = BytesMut::from(&b"borets1"[..]); @@ -410,10 +478,33 @@ mod tests { let (name, metric) = r.unwrap(); // Only one metric should be parsed - assert_eq!(&name[..], &b"gorets"[..]); - assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap()); + assert_eq!(&name.name[..], &b"gorets"[..]); + assert_eq!( + metric, + Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap() + ); } + #[test] + fn parse_metric_with_tags() { + let mut data = BytesMut::from(&b"gorets;a=b:+1000|g\ngorets:-1000|g|@0.5"[..]); + let mut parser = make_parser(&mut data); + let (name, metric) = parser.next().unwrap(); + // name is still full string, including tags + assert_eq!(&name.name[..], &b"gorets;a=b"[..]); + //assert_eq!(name.tag_pos, Some(5usize)); + assert_eq!(&name.name[name.tag_pos.unwrap()..], &b"a=b"[..]); + assert_eq!( + metric, + Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, None).unwrap() + ); + let (name, metric) = parser.next().unwrap(); + assert_eq!(&name.name[..], &b"gorets"[..]); + assert_eq!( + metric, + Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap() + ); + } #[test] fn parse_split_metric_buf() { @@ -423,11 +514,27 @@ mod tests { ); let correct = vec![ - (Bytes::from("gorets1"),Metric::::new(1001f64, MetricType::Gauge(Some(1)), None, None).unwrap()), - (Bytes::from("nuggets2"),Metric::::new(-1002f64, MetricType::Set(HashSet::new()), None, Some(0.5)).unwrap()), - (Bytes::from("gorets3"),Metric::::new(1003f64, MetricType::Gauge(Some(1)), None, None).unwrap()), - (Bytes::from("gorets4"),Metric::::new(1004f64, MetricType::Timer(Vec::new()), None, None).unwrap()), - (Bytes::from("gorets5"),Metric::::new(1005f64, MetricType::Timer(Vec::new()), None, None).unwrap()), + ( + Bytes::from("gorets1"), + Metric::::new(1001f64, MetricType::Gauge(Some(1)), None, None).unwrap(), + ), + ( + Bytes::from("nuggets2"), + Metric::::new(-1002f64, MetricType::Set(HashSet::new()), None, Some(0.5)) + .unwrap(), + ), + ( + Bytes::from("gorets3"), + Metric::::new(1003f64, MetricType::Gauge(Some(1)), None, None).unwrap(), + ), + ( + Bytes::from("gorets4"), + Metric::::new(1004f64, MetricType::Timer(Vec::new()), None, None).unwrap(), + ), + ( + Bytes::from("gorets5"), + Metric::::new(1005f64, MetricType::Timer(Vec::new()), None, None).unwrap(), + ), ]; for i in 1..(data.len() + 1) { // this is out test case - partially received data @@ -440,47 +547,73 @@ mod tests { let mut res = Vec::new(); // we use 20 as max_unparsed essentially to test total trash path - let parser = MetricParser::::new(&mut testinput, 20, TestParseErrorHandler); + let parser = MetricParser::::new( + &mut testinput, + 20, + TestParseErrorHandler, + ); for (name, metric) in parser { res.push((name, metric)); } println!("RES: {:?}", res); // until 15th gorets1 is not there, no metrics should be parsed - if i < 15 { assert!(res.len() == 0) } + if i < 15 { + assert!(res.len() == 0) + } // 15 and later gorets1 should always be parsed - if i >= 15 { assert!(res.len() > 0)} + if i >= 15 { + assert!(res.len() > 0) + } // between 15 and 43 ONLY gorets1 should be parsed - if i >= 15 && i < 43 { assert!(res.len() == 1)} + if i >= 15 && i < 43 { + assert!(res.len() == 1) + } // on 43 wild nuggets2 appears without sampling spec... - if i == 43 { assert!(res.len() == 2)} + if i == 43 { + assert!(res.len() == 2) + } // .. and disappears because parser understands there are more chars to be parsed - if i >= 44 && i < 46 { assert!(res.len() == 1)} + if i >= 44 && i < 46 { + assert!(res.len() == 1) + } // nuggets2:-1000|g|@0 is ok on 46 - if i == 46 { assert!(res.len() == 2)} + if i == 46 { + assert!(res.len() == 2) + } // nuggets2:-1000|g|@0. is not ok on 47 - if i == 47 { assert!(res.len() == 1)} + if i == 47 { + assert!(res.len() == 1) + } // 48 and forth both metrics must exist - if i > 47 && i < 80 { assert!(res.len() == 2)} + if i > 47 && i < 80 { + assert!(res.len() == 2) + } // after 79 there must be 3 metrics - if i >= 80 && i < 96 { assert!(res.len() == 3)} + if i >= 80 && i < 96 { + assert!(res.len() == 3) + } // after 97 there must be 4 metrics - if i >= 96 && i < 171 { assert!(res.len() == 4)} - if i >= 171 { assert!(res.len() == 5)} + if i >= 96 && i < 171 { + assert!(res.len() == 4) + } + if i >= 171 { + assert!(res.len() == 5) + } for (n, (cname, cmet)) in correct.iter().enumerate() { - if let Some((name,met)) = res.get(n) { - assert_eq!(cname, name); + if let Some((name, met)) = res.get(n) { + assert_eq!(cname, &name.name); if n == 1 && i == 43 || i == 46 { // nuggets is intentionally parsed in another way on some steps - continue + continue; } assert_eq!(cmet, met); } From 01c1c8abf852704fe840510674f1d3be39e7dc5c Mon Sep 17 00:00:00 2001 From: Sergey Noskov Date: Fri, 6 Sep 2019 18:14:31 +0300 Subject: [PATCH 02/23] Start trying to parse tags, errors yet --- src/parser.rs | 270 ++++++++++++++++++-------------------------------- 1 file changed, 99 insertions(+), 171 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 01581a7..e4a5248 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -8,11 +8,12 @@ use combine::byte::{byte, bytes, digit, newline}; use combine::combinator::{eof, skip_many}; use combine::error::{ParseError, StreamError}; use combine::parser::range::{recognize, take, take_until_range, take_while1}; +use combine::stream::easy; use combine::stream::{decode, PointerOffset, RangeStream, StreamErrorFor}; use combine::{choice, position}; use combine::{optional, skip_many1, Parser}; -use bytes::{Bytes, BytesMut}; +use bytes::BytesMut; use crate::metric::{FromF64, Metric, MetricType}; use crate::name::MetricName; @@ -24,13 +25,9 @@ pub enum ParsedPart where F: Float + FromStr + Debug + AsPrimitive, { - Metric( - (PointerOffset, PointerOffset), - Option, - Metric, - ), - Trash, - TotalTrash, + Metric((PointerOffset, PointerOffset), Option, Metric), + Trash(PointerOffset), + TotalTrash(PointerOffset), } /// The parser signature may seem cryptic, but it can mostly be ignored totally and get used as @@ -38,36 +35,32 @@ where /// (TODO: code example) /// /// It's current goal is to be fast, use less allocs and to not depend on error and even probably input typing -pub fn metric_stream_parser<'a, I, F>( - max_unparsed: usize, -) -> impl Parser, PartialState = impl Default + 'a> +pub fn metric_stream_parser<'a, I, F>(max_unparsed: usize, max_tags_len: usize) -> impl Parser, PartialState = impl Default + 'a> where I: RangeStream + std::fmt::Debug, I::Error: ParseError, F: 'a + Float + Debug + FromStr + AsPrimitive + FromF64 + Sync, ::Err: std::error::Error + Sync + Send + 'static, { + // empty comments help rustfmt with formatting + // to avoid allocations, we only find a position of a name in buffer let name_with_tags = ( + // position(), take_while1::(|c: u8| c != b';' && c != b':' && c != b'\n'), - optional(( - byte(b';'), - position(), - take_while1::(|c: u8| c != b':' && c != b'\n'), - )), + optional((byte(b';'), position(), take_while1::(|c: u8| c != b':' && c != b'\n'))), position(), ) .skip(byte(b':')) - .and_then(|(start, name, maybe_tag, stop)| { + .and_then(move |(start, name, maybe_tag, stop)| { //TODO: introduce metric name limits, use is.alphabetical for each unicode char - from_utf8(name).map_err(|_e| { - StreamErrorFor::::unexpected_static_message("name part is not valid utf8") - })?; + from_utf8(name).map_err(|_e| StreamErrorFor::::unexpected_static_message("name part is not valid utf8"))?; let tag_pos = if let Some((_, tag_pos, tag)) = maybe_tag { - from_utf8(tag).map_err(|_e| { - StreamErrorFor::::unexpected_static_message("tag part is not valid utf8") - })?; + if tag.len() > max_tags_len { + return Err(StreamErrorFor::::unexpected_static_message("tag part is too long")); + } + from_utf8(tag).map_err(|_e| StreamErrorFor::::unexpected_static_message("tag part is not valid utf8"))?; Some(tag_pos) } else { None @@ -79,15 +72,18 @@ where // This should parse metric value and separator let val = take_while1(|c: u8| c != b'|' && c != b'\n') + // .skip(byte(b'|')) .and_then(|value| { from_utf8(value) + // .map_err(StreamErrorFor::::other) .map(|v| v.parse::().map_err(StreamErrorFor::::other))? }); // This parses metric type let mtype = bytes(b"ms") + // .map(|_| MetricType::Timer(Vec::::new())) .or(byte(b'g').map(|_| MetricType::Gauge(None))) .or(byte(b'C').map(|_| MetricType::DiffCounter(F::zero()))) @@ -95,18 +91,16 @@ where .or(byte(b's').map(|_| MetricType::Set(HashSet::new()))); let unsigned_float = skip_many1(digit()) + // .and(optional((byte(b'.'), skip_many1(digit())))) - .and(optional(( - byte(b'e'), - optional(byte(b'+').or(byte(b'-'))), - skip_many1(digit()), - ))); + .and(optional( + // + (byte(b'e'), optional(byte(b'+').or(byte(b'-'))), skip_many1(digit())), + )); let sampling = (bytes(b"|@"), recognize(unsigned_float)).and_then(|(_, val)| { // TODO replace from_utf8 with handmade parser removing recognize - from_utf8(val) - .map_err(StreamErrorFor::::other) - .map(|v| v.parse::().map_err(StreamErrorFor::::other))? + from_utf8(val).map_err(StreamErrorFor::::other).map(|v| v.parse::().map_err(StreamErrorFor::::other))? }); let metric = ( @@ -137,22 +131,18 @@ where // here's what we are trying to parse choice(( // valid metric with (probably) tags - ( - skip_many(newline()), - name_with_tags, - metric, - skip_many(newline()), - ) - .map(|(_, (start, tag, stop), m, _)| ParsedPart::Metric((start, stop), tag, m)), - (take_until_range(&b"\n"[..]), skip_many1(newline())).map(|_| ParsedPart::Trash), + (skip_many(newline()), name_with_tags, metric, skip_many(newline())).map(|(_, (start, tag, stop), m, _)| ParsedPart::Metric((start, stop), tag, m)), + (take_until_range(&b"\n"[..]), position(), skip_many1(newline())).map(|(_, pos, _)| ParsedPart::Trash(pos)), // trash not ending with \n, but too long to be metric - take(max_unparsed).map(|_| ParsedPart::TotalTrash), + (take(max_unparsed), position()).map(|(_, pos)| ParsedPart::TotalTrash(pos)), )) } +type MetricParsingError<'a> = easy::Errors; + #[allow(unused_variables)] pub trait ParseErrorHandler { - fn handle(&self, buf: &[u8], pos: usize) {} + fn handle(&self, buf: &[u8], pos: usize, e: MetricParsingError) {} } pub struct DummyParseErrorHandler; @@ -165,6 +155,7 @@ pub struct MetricParser<'a, F, E: ParseErrorHandler> { input: &'a mut BytesMut, skip: usize, max_unparsed: usize, + max_tags_len: usize, handler: E, _pd: PhantomData, } @@ -173,14 +164,8 @@ impl<'a, F, E> MetricParser<'a, F, E> where E: ParseErrorHandler, { - pub fn new(input: &'a mut BytesMut, max_unparsed: usize, handler: E) -> Self { - Self { - input, - skip: 0, - max_unparsed, - handler, - _pd: PhantomData, - } + pub fn new(input: &'a mut BytesMut, max_unparsed: usize, max_tags_len: usize, handler: E) -> Self { + Self { input, skip: 0, max_unparsed, max_tags_len, handler, _pd: PhantomData } } } @@ -193,6 +178,7 @@ where type Item = (MetricName, Metric); fn next(&mut self) -> Option { loop { + dbg!("ITER"); if self.skip >= self.input.len() { return None; } @@ -200,17 +186,13 @@ where let res = { let input = &self.input[self.skip..]; - let parser = metric_stream_parser(self.max_unparsed); + let parser = metric_stream_parser(self.max_unparsed, self.max_tags_len); // let res = decode( //parser, //combine::stream::PartialStream(input), //&mut Default::default(), // ); - let res = decode( - parser, - combine::easy::Stream(input), - &mut Default::default(), - ); + let res = decode(parser, combine::easy::Stream(input), &mut Default::default()); res }; @@ -221,6 +203,8 @@ where return None; } Ok((Some(ParsedPart::Metric(name_pos, tag_pos, metric)), consumed)) => { + dbg!(&self.input); + dbg!(&consumed); // at this point our input buffer looks like this // [bad_data][useless_data][name][metric] // we consider a metric name WITH tags as a "name" here @@ -236,13 +220,14 @@ where let stop = name_pos.1.translate_position(input); let tag_pos = tag_pos.map(|pos| pos.translate_position(input)); + dbg!(&tag_pos); // before touching the buffer calculate position to advance after name let metriclen = consumed - stop; // there can be errors found before correct parsing // so we cut it off if self.skip > 0 { - self.handler.handle(self.input, self.skip); + self.handler.handle(self.input, self.skip, easy::Errors::empty(name_pos.0)); self.input.advance(self.skip); } @@ -251,7 +236,7 @@ where self.input.advance(start); // now we can cut the name itself - let name = self.input.split_to(stop - start).freeze(); + let name = self.input.split_to(stop - start); self.input.advance(metriclen); @@ -259,7 +244,7 @@ where return Some((MetricName::new(name, tag_pos), metric)); } - Ok((Some(ParsedPart::Trash), consumed)) => { + Ok((Some(ParsedPart::Trash(pos)), consumed)) => { // trash matched // skip it and continue, because // it can still be followed by metric @@ -270,9 +255,10 @@ where } else { self.skip += consumed; } - self.handler.handle(self.input, self.skip); + // TODO error information + self.handler.handle(self.input, self.skip, easy::Errors::empty(pos)); } - Ok((Some(ParsedPart::TotalTrash), consumed)) => { + Ok((Some(ParsedPart::TotalTrash(pos)), consumed)) => { // buffer is at max allowed length, but still no metrics there // break cutting buffer to length specified // this is the same action as trash, but a separate error message @@ -283,10 +269,12 @@ where self.skip = 0; // buffer can be more than max_unparsed, so we cut and continue - self.handler.handle(self.input, consumed); + // TODO error information + self.handler.handle(self.input, consumed, easy::Errors::empty(pos)); self.input.advance(consumed); } Err(e) => { + dbg!(&e); // error happens when no parsers match yet, i.e. some trash starts to come, // yet didn't reach the max_unparsed, but we already know it // cannot be metric @@ -295,7 +283,7 @@ where let input = &self.input[self.skip..]; let skip = e.position.translate_position(input); - self.handler.handle(self.input, self.skip); + self.handler.handle(self.input, self.skip, e); if skip == 0 { // error found at the very first byte // skip 1 byte and try again @@ -312,6 +300,7 @@ where self.skip = 0; self.input.advance(skip); } + dbg!(&self.input); } } } @@ -322,23 +311,20 @@ where mod tests { use super::*; + use bytes::Bytes; // TODO: Questioned cases: // * negative counters // * diff counters struct TestParseErrorHandler; impl ParseErrorHandler for TestParseErrorHandler { - fn handle(&self, input: &[u8], pos: usize) { - println!( - "parse error at {:?} in {:?}", - pos, - String::from_utf8_lossy(input) - ); + fn handle(&self, input: &[u8], pos: usize, e: MetricParsingError) { + println!("parse error at {:?} in {:?}: {:?}", pos, String::from_utf8_lossy(input), e); } } fn make_parser(input: &mut BytesMut) -> MetricParser { - MetricParser::::new(input, 100, TestParseErrorHandler) + MetricParser::::new(input, 100, 50, TestParseErrorHandler) } #[test] @@ -347,10 +333,7 @@ mod tests { let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!( - metric, - Metric::::new(1f64, MetricType::Counter, None, Some(1f32)).unwrap() - ); + assert_eq!(metric, Metric::::new(1f64, MetricType::Counter, None, Some(1f32)).unwrap()); assert_eq!(parser.next(), None); } @@ -361,55 +344,37 @@ mod tests { let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!( - metric, - Metric::::new(12.65f64, MetricType::Counter, None, Some(1e-3f32)).unwrap() - ); + assert_eq!(metric, Metric::::new(12.65f64, MetricType::Counter, None, Some(1e-3f32)).unwrap()); assert_eq!(parser.next(), None); } #[test] fn parse_metric_with_newline() { - let mut data = BytesMut::from( - &b"complex.bioyino.test1:-1e10|g\n\ncomplex.bioyino.test10:-1e10|g\n\n\n"[..], - ); + let mut data = BytesMut::from(&b"complex.bioyino.test1:-1e10|g\n\ncomplex.bioyino.test10:-1e10|g\n\n\n"[..]); let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"complex.bioyino.test1"[..]); - assert_eq!( - metric, - Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap() - ); + assert_eq!(metric, Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap()); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"complex.bioyino.test10"[..]); - assert_eq!( - metric, - Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap() - ); + assert_eq!(metric, Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap()); assert_eq!(parser.next(), None); } #[test] fn parse_metric_without_newline() { - let mut data = - BytesMut::from(&b"complex.bioyino.test1:-1e10|gcomplex.bioyino.test10:-1e10|g"[..]); + let mut data = BytesMut::from(&b"complex.bioyino.test1:-1e10|gcomplex.bioyino.test10:-1e10|g"[..]); let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"complex.bioyino.test1"[..]); - assert_eq!( - metric, - Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap() - ); + assert_eq!(metric, Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap()); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"complex.bioyino.test10"[..]); - assert_eq!( - metric, - Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap() - ); + assert_eq!(metric, Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap()); assert_eq!(parser.next(), None); } @@ -420,16 +385,10 @@ mod tests { let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!( - metric, - Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, Some(0.0004)).unwrap() - ); + assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, Some(0.0004)).unwrap()); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!( - metric, - Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap() - ); + assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap()); assert_eq!(parser.next(), None); } @@ -439,10 +398,7 @@ mod tests { let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!( - metric, - Metric::::new(1f64, MetricType::Counter, None, None).unwrap() - ); + assert_eq!(metric, Metric::::new(1f64, MetricType::Counter, None, None).unwrap()); assert_eq!(parser.next(), None); } @@ -452,16 +408,10 @@ mod tests { let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!( - metric, - Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, None).unwrap() - ); + assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, None).unwrap()); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!( - metric, - Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap() - ); + assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap()); assert_eq!(parser.next(), None); } @@ -479,79 +429,57 @@ mod tests { // Only one metric should be parsed assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!( - metric, - Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap() - ); + assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap()); } #[test] fn parse_metric_with_tags() { - let mut data = BytesMut::from(&b"gorets;a=b:+1000|g\ngorets:-1000|g|@0.5"[..]); + let mut data = BytesMut::from(&b"gorets;a=b;c=d:+1000|g\ngorets:-1000|g|@0.5"[..]); let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); // name is still full string, including tags - assert_eq!(&name.name[..], &b"gorets;a=b"[..]); - //assert_eq!(name.tag_pos, Some(5usize)); - assert_eq!(&name.name[name.tag_pos.unwrap()..], &b"a=b"[..]); - assert_eq!( - metric, - Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, None).unwrap() - ); + assert_eq!(&name.name[..], &b"gorets;a=b;c=d"[..]); + assert_eq!(name.tag_pos, Some(7usize)); + assert_eq!(&name.name[name.tag_pos.unwrap()..], &b"a=b;c=d"[..]); + assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, None).unwrap()); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!( - metric, - Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap() - ); + assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap()); + } + + #[test] + fn parse_metric_with_long_tags() { + let mut data = BytesMut::from(&b"gorets;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b:+1000|g"[..]); + let mut parser = make_parser(&mut data); + assert!(parser.next().is_none()); + } + + #[test] + fn parse_many_metrics_with_long_tags() { + // metric with long tags followed by another metric + let mut data = BytesMut::from(&b"gorets;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b:+1000|g\nbobets;c=d:1000|g"[..]); + let mut parser = make_parser(&mut data); + let (name, metric) = parser.next().unwrap(); + assert_eq!(&name.name[..], &b"bobets;c=d"[..]); + //assert_eq!(name.tag_pos, Some(6usize)); + assert_eq!(&name.name[name.tag_pos.unwrap()..], &b"c=d"[..]); + assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, None).unwrap()); } #[test] fn parse_split_metric_buf() { let mut data = BytesMut::new(); - data.extend_from_slice( - b"gorets1:+1001|g\nT\x01RAi:|\x01SH\nnuggets2:-1002|s|@0.5\nMORETrasH\nFUUU\n\ngorets3:+1003|ggorets4:+1004|ms:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::ggorets5:1005|ms", - ); - - let correct = vec![ - ( - Bytes::from("gorets1"), - Metric::::new(1001f64, MetricType::Gauge(Some(1)), None, None).unwrap(), - ), - ( - Bytes::from("nuggets2"), - Metric::::new(-1002f64, MetricType::Set(HashSet::new()), None, Some(0.5)) - .unwrap(), - ), - ( - Bytes::from("gorets3"), - Metric::::new(1003f64, MetricType::Gauge(Some(1)), None, None).unwrap(), - ), - ( - Bytes::from("gorets4"), - Metric::::new(1004f64, MetricType::Timer(Vec::new()), None, None).unwrap(), - ), - ( - Bytes::from("gorets5"), - Metric::::new(1005f64, MetricType::Timer(Vec::new()), None, None).unwrap(), - ), - ]; + data.extend_from_slice(b"gorets1:+1001|g\nT\x01RAi:|\x01SH\nnuggets2:-1002|s|@0.5\nMORETrasH\nFUUU\n\ngorets3:+1003|ggorets4:+1004|ms:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::ggorets5:1005|ms"); + + let correct = vec![(Bytes::from("gorets1"), Metric::::new(1001f64, MetricType::Gauge(Some(1)), None, None).unwrap()), (Bytes::from("nuggets2"), Metric::::new(-1002f64, MetricType::Set(HashSet::new()), None, Some(0.5)).unwrap()), (Bytes::from("gorets3"), Metric::::new(1003f64, MetricType::Gauge(Some(1)), None, None).unwrap()), (Bytes::from("gorets4"), Metric::::new(1004f64, MetricType::Timer(Vec::new()), None, None).unwrap()), (Bytes::from("gorets5"), Metric::::new(1005f64, MetricType::Timer(Vec::new()), None, None).unwrap())]; for i in 1..(data.len() + 1) { // this is out test case - partially received data let mut testinput = BytesMut::from(&data[0..i]); - println!( - "TEST[{}] {:?}", - i, - String::from_utf8(Vec::from(&testinput[..])).unwrap() - ); + println!("TEST[{}] {:?}", i, String::from_utf8(Vec::from(&testinput[..])).unwrap()); let mut res = Vec::new(); // we use 20 as max_unparsed essentially to test total trash path - let parser = MetricParser::::new( - &mut testinput, - 20, - TestParseErrorHandler, - ); + let parser = MetricParser::::new(&mut testinput, 20, 20, TestParseErrorHandler); for (name, metric) in parser { res.push((name, metric)); } From b55cb461ba3e9ada41fbc1f6f8c5b981df48b9ca Mon Sep 17 00:00:00 2001 From: Sergey Noskov Date: Tue, 10 Sep 2019 07:45:40 +0300 Subject: [PATCH 03/23] Start trying to parse tags, errors yet --- src/name.rs | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 src/name.rs diff --git a/src/name.rs b/src/name.rs new file mode 100644 index 0000000..2cef9ce --- /dev/null +++ b/src/name.rs @@ -0,0 +1,47 @@ +use std::hash::{Hash, Hasher}; + +use bytes::BytesMut; + +pub enum TagMode { + Graphite, +} + +/// Contains buffer containing the full metric name including tags +/// and some data to split tags from name useful for appending aggregates +#[derive(Debug, PartialEq, Eq)] +pub struct MetricName { + pub name: BytesMut, + pub tag_pos: Option, +} + +impl MetricName { + pub fn new(name: BytesMut, tag_pos: Option) -> Self { + Self { name, tag_pos } + } + + pub fn find_tag_pos(&mut self) { + if self.tag_pos.is_none() { + self.tag_pos = self.name.iter().position(|c| *c == b';') + } + } + + pub fn normalize_tags(&mut self, mode: TagMode) { + let tag_pos = match self.tag_pos { + None => return, + Some(t) => t, + }; + + // TODO: handle sorting + // TODO: handle repeating same tags i.e. gorets;a=b;e=b;a=b:... + match mode { + TagMode::Graphite => unimplemented!(), + } + } +} + +impl Hash for MetricName { + fn hash(&self, state: &mut H) { + // metric with tag position found and tag position not found should be the same + self.name.hash(state); + } +} From 071d1b588dee256c8dfeff708b4755617be35c9c Mon Sep 17 00:00:00 2001 From: Sergey Noskov Date: Tue, 10 Sep 2019 09:25:55 +0300 Subject: [PATCH 04/23] Correct tag position, some code on tag sorting --- src/name.rs | 87 ++++++++++++++++--- src/parser.rs | 229 ++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 256 insertions(+), 60 deletions(-) diff --git a/src/name.rs b/src/name.rs index 2cef9ce..e7ab7e5 100644 --- a/src/name.rs +++ b/src/name.rs @@ -1,3 +1,4 @@ +use std::collections::BTreeMap; use std::hash::{Hash, Hasher}; use bytes::BytesMut; @@ -12,36 +13,94 @@ pub enum TagMode { pub struct MetricName { pub name: BytesMut, pub tag_pos: Option, + //pub tags: BTreeMap, // we need btreemap to have tags sorted } impl MetricName { pub fn new(name: BytesMut, tag_pos: Option) -> Self { - Self { name, tag_pos } + Self { + name, + tag_pos, /*tags: BTreeMap::new()*/ + } } - pub fn find_tag_pos(&mut self) { - if self.tag_pos.is_none() { + pub fn find_tag_pos(&mut self, force: bool) -> bool { + if force || self.tag_pos.is_none() { self.tag_pos = self.name.iter().position(|c| *c == b';') } + self.tag_pos.is_some() } - pub fn normalize_tags(&mut self, mode: TagMode) { - let tag_pos = match self.tag_pos { - None => return, - Some(t) => t, - }; - - // TODO: handle sorting - // TODO: handle repeating same tags i.e. gorets;a=b;e=b;a=b:... - match mode { - TagMode::Graphite => unimplemented!(), - } + /// sort tags in place using intermediate buffer, buffer length MUST be at least + /// `name.len() - tag_pos` bytes + /// sorting is made lexicographically + pub fn sort_tags>( + &mut self, + mode: TagMode, + intermediate: &mut B, + ) -> Result<(), ()> { + // TODO: think error type + Err(()) } + + // allocate tags structure and shorten name fetching tags into BTreeMap + //pub fn fetch_tags(&mut self, mode: TagMode) { + //let tag_pos = match self.tag_pos { + //None => return, + //Some(t) => t, + //}; + + //// TODO: handle sorting + //// TODO: handle repeating same tags i.e. gorets;a=b;e=b;a=b:... + //match mode { + //TagMode::Graphite => unimplemented!(), + //} + //} } impl Hash for MetricName { fn hash(&self, state: &mut H) { // metric with tag position found and tag position not found should be the same self.name.hash(state); + //if self.tags.len() > 0 { + //self.tags.hash(state); + //} + } +} +#[cfg(test)] +mod tests { + use super::*; + + use bytes::Bytes; + + #[test] + fn metric_name_tag_position() { + let mut name = MetricName::new(BytesMut::from(&b"gorets.bobez;a=b;c=d"[..]), Some(1)); //provide incorrect pos + name.find_tag_pos(true); + assert_eq!(name.tag_pos, Some(12)); + } + + #[test] + fn metric_name_sort_tags_graphite() { + let mut name = MetricName::new(BytesMut::from(&b"gorets.bobez;a=b;c=d"[..]), None); + name.find_tag_pos(false); + let _ = name.tag_pos.unwrap(); + + // let mut parser = make_parser(&mut data); + //let (name, metric) = parser.next().unwrap(); + //// name is still full string, including tags + //assert_eq!(&name.name[..], &b"gorets;a=b;c=d"[..]); + //assert_eq!(name.tag_pos, Some(7usize)); + //assert_eq!(&name.name[name.tag_pos.unwrap()..], &b"a=b;c=d"[..]); + //assert_eq!( + //metric, + //Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, None).unwrap() + //); + //let (name, metric) = parser.next().unwrap(); + //assert_eq!(&name.name[..], &b"gorets"[..]); + //assert_eq!( + //metric, + //Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap() + // ); } } diff --git a/src/parser.rs b/src/parser.rs index e4a5248..0798323 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -25,7 +25,11 @@ pub enum ParsedPart where F: Float + FromStr + Debug + AsPrimitive, { - Metric((PointerOffset, PointerOffset), Option, Metric), + Metric( + (PointerOffset, PointerOffset), + Option, + Metric, + ), Trash(PointerOffset), TotalTrash(PointerOffset), } @@ -35,7 +39,10 @@ where /// (TODO: code example) /// /// It's current goal is to be fast, use less allocs and to not depend on error and even probably input typing -pub fn metric_stream_parser<'a, I, F>(max_unparsed: usize, max_tags_len: usize) -> impl Parser, PartialState = impl Default + 'a> +pub fn metric_stream_parser<'a, I, F>( + max_unparsed: usize, + max_tags_len: usize, +) -> impl Parser, PartialState = impl Default + 'a> where I: RangeStream + std::fmt::Debug, I::Error: ParseError, @@ -49,18 +56,28 @@ where // position(), take_while1::(|c: u8| c != b';' && c != b':' && c != b'\n'), - optional((byte(b';'), position(), take_while1::(|c: u8| c != b':' && c != b'\n'))), + optional(( + byte(b';'), + position(), + take_while1::(|c: u8| c != b':' && c != b'\n'), + )), position(), ) .skip(byte(b':')) .and_then(move |(start, name, maybe_tag, stop)| { //TODO: introduce metric name limits, use is.alphabetical for each unicode char - from_utf8(name).map_err(|_e| StreamErrorFor::::unexpected_static_message("name part is not valid utf8"))?; + from_utf8(name).map_err(|_e| { + StreamErrorFor::::unexpected_static_message("name part is not valid utf8") + })?; let tag_pos = if let Some((_, tag_pos, tag)) = maybe_tag { if tag.len() > max_tags_len { - return Err(StreamErrorFor::::unexpected_static_message("tag part is too long")); + return Err(StreamErrorFor::::unexpected_static_message( + "tag part is too long", + )); } - from_utf8(tag).map_err(|_e| StreamErrorFor::::unexpected_static_message("tag part is not valid utf8"))?; + from_utf8(tag).map_err(|_e| { + StreamErrorFor::::unexpected_static_message("tag part is not valid utf8") + })?; Some(tag_pos) } else { None @@ -95,12 +112,18 @@ where .and(optional((byte(b'.'), skip_many1(digit())))) .and(optional( // - (byte(b'e'), optional(byte(b'+').or(byte(b'-'))), skip_many1(digit())), + ( + byte(b'e'), + optional(byte(b'+').or(byte(b'-'))), + skip_many1(digit()), + ), )); let sampling = (bytes(b"|@"), recognize(unsigned_float)).and_then(|(_, val)| { // TODO replace from_utf8 with handmade parser removing recognize - from_utf8(val).map_err(StreamErrorFor::::other).map(|v| v.parse::().map_err(StreamErrorFor::::other))? + from_utf8(val) + .map_err(StreamErrorFor::::other) + .map(|v| v.parse::().map_err(StreamErrorFor::::other))? }); let metric = ( @@ -131,8 +154,19 @@ where // here's what we are trying to parse choice(( // valid metric with (probably) tags - (skip_many(newline()), name_with_tags, metric, skip_many(newline())).map(|(_, (start, tag, stop), m, _)| ParsedPart::Metric((start, stop), tag, m)), - (take_until_range(&b"\n"[..]), position(), skip_many1(newline())).map(|(_, pos, _)| ParsedPart::Trash(pos)), + ( + skip_many(newline()), + name_with_tags, + metric, + skip_many(newline()), + ) + .map(|(_, (start, tag, stop), m, _)| ParsedPart::Metric((start, stop), tag, m)), + ( + take_until_range(&b"\n"[..]), + position(), + skip_many1(newline()), + ) + .map(|(_, pos, _)| ParsedPart::Trash(pos)), // trash not ending with \n, but too long to be metric (take(max_unparsed), position()).map(|(_, pos)| ParsedPart::TotalTrash(pos)), )) @@ -164,8 +198,20 @@ impl<'a, F, E> MetricParser<'a, F, E> where E: ParseErrorHandler, { - pub fn new(input: &'a mut BytesMut, max_unparsed: usize, max_tags_len: usize, handler: E) -> Self { - Self { input, skip: 0, max_unparsed, max_tags_len, handler, _pd: PhantomData } + pub fn new( + input: &'a mut BytesMut, + max_unparsed: usize, + max_tags_len: usize, + handler: E, + ) -> Self { + Self { + input, + skip: 0, + max_unparsed, + max_tags_len, + handler, + _pd: PhantomData, + } } } @@ -178,7 +224,6 @@ where type Item = (MetricName, Metric); fn next(&mut self) -> Option { loop { - dbg!("ITER"); if self.skip >= self.input.len() { return None; } @@ -192,7 +237,11 @@ where //combine::stream::PartialStream(input), //&mut Default::default(), // ); - let res = decode(parser, combine::easy::Stream(input), &mut Default::default()); + let res = decode( + parser, + combine::easy::Stream(input), + &mut Default::default(), + ); res }; @@ -203,8 +252,6 @@ where return None; } Ok((Some(ParsedPart::Metric(name_pos, tag_pos, metric)), consumed)) => { - dbg!(&self.input); - dbg!(&consumed); // at this point our input buffer looks like this // [bad_data][useless_data][name][metric] // we consider a metric name WITH tags as a "name" here @@ -218,16 +265,20 @@ where let input = &self.input[self.skip..]; let start = name_pos.0.translate_position(input); let stop = name_pos.1.translate_position(input); - let tag_pos = tag_pos.map(|pos| pos.translate_position(input)); - dbg!(&tag_pos); + // tag_pos is counted relative to input buffer + // but we need it to be relative to name + // to be related correctly, we have to shift it to `start` bytes right + let tag_pos = tag_pos.map(|pos| pos.translate_position(input) - start); + // before touching the buffer calculate position to advance after name let metriclen = consumed - stop; // there can be errors found before correct parsing // so we cut it off if self.skip > 0 { - self.handler.handle(self.input, self.skip, easy::Errors::empty(name_pos.0)); + self.handler + .handle(self.input, self.skip, easy::Errors::empty(name_pos.0)); self.input.advance(self.skip); } @@ -256,7 +307,8 @@ where self.skip += consumed; } // TODO error information - self.handler.handle(self.input, self.skip, easy::Errors::empty(pos)); + self.handler + .handle(self.input, self.skip, easy::Errors::empty(pos)); } Ok((Some(ParsedPart::TotalTrash(pos)), consumed)) => { // buffer is at max allowed length, but still no metrics there @@ -270,11 +322,11 @@ where // buffer can be more than max_unparsed, so we cut and continue // TODO error information - self.handler.handle(self.input, consumed, easy::Errors::empty(pos)); + self.handler + .handle(self.input, consumed, easy::Errors::empty(pos)); self.input.advance(consumed); } Err(e) => { - dbg!(&e); // error happens when no parsers match yet, i.e. some trash starts to come, // yet didn't reach the max_unparsed, but we already know it // cannot be metric @@ -300,7 +352,6 @@ where self.skip = 0; self.input.advance(skip); } - dbg!(&self.input); } } } @@ -319,7 +370,12 @@ mod tests { struct TestParseErrorHandler; impl ParseErrorHandler for TestParseErrorHandler { fn handle(&self, input: &[u8], pos: usize, e: MetricParsingError) { - println!("parse error at {:?} in {:?}: {:?}", pos, String::from_utf8_lossy(input), e); + println!( + "parse error at {:?} in {:?}: {:?}", + pos, + String::from_utf8_lossy(input), + e + ); } } @@ -333,7 +389,10 @@ mod tests { let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!(metric, Metric::::new(1f64, MetricType::Counter, None, Some(1f32)).unwrap()); + assert_eq!( + metric, + Metric::::new(1f64, MetricType::Counter, None, Some(1f32)).unwrap() + ); assert_eq!(parser.next(), None); } @@ -344,37 +403,55 @@ mod tests { let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!(metric, Metric::::new(12.65f64, MetricType::Counter, None, Some(1e-3f32)).unwrap()); + assert_eq!( + metric, + Metric::::new(12.65f64, MetricType::Counter, None, Some(1e-3f32)).unwrap() + ); assert_eq!(parser.next(), None); } #[test] fn parse_metric_with_newline() { - let mut data = BytesMut::from(&b"complex.bioyino.test1:-1e10|g\n\ncomplex.bioyino.test10:-1e10|g\n\n\n"[..]); + let mut data = BytesMut::from( + &b"complex.bioyino.test1:-1e10|g\n\ncomplex.bioyino.test10:-1e10|g\n\n\n"[..], + ); let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"complex.bioyino.test1"[..]); - assert_eq!(metric, Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap()); + assert_eq!( + metric, + Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap() + ); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"complex.bioyino.test10"[..]); - assert_eq!(metric, Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap()); + assert_eq!( + metric, + Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap() + ); assert_eq!(parser.next(), None); } #[test] fn parse_metric_without_newline() { - let mut data = BytesMut::from(&b"complex.bioyino.test1:-1e10|gcomplex.bioyino.test10:-1e10|g"[..]); + let mut data = + BytesMut::from(&b"complex.bioyino.test1:-1e10|gcomplex.bioyino.test10:-1e10|g"[..]); let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"complex.bioyino.test1"[..]); - assert_eq!(metric, Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap()); + assert_eq!( + metric, + Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap() + ); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"complex.bioyino.test10"[..]); - assert_eq!(metric, Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap()); + assert_eq!( + metric, + Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap() + ); assert_eq!(parser.next(), None); } @@ -385,10 +462,16 @@ mod tests { let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, Some(0.0004)).unwrap()); + assert_eq!( + metric, + Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, Some(0.0004)).unwrap() + ); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap()); + assert_eq!( + metric, + Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap() + ); assert_eq!(parser.next(), None); } @@ -398,7 +481,10 @@ mod tests { let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!(metric, Metric::::new(1f64, MetricType::Counter, None, None).unwrap()); + assert_eq!( + metric, + Metric::::new(1f64, MetricType::Counter, None, None).unwrap() + ); assert_eq!(parser.next(), None); } @@ -408,10 +494,16 @@ mod tests { let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, None).unwrap()); + assert_eq!( + metric, + Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, None).unwrap() + ); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap()); + assert_eq!( + metric, + Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap() + ); assert_eq!(parser.next(), None); } @@ -429,7 +521,10 @@ mod tests { // Only one metric should be parsed assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap()); + assert_eq!( + metric, + Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap() + ); } #[test] @@ -441,15 +536,23 @@ mod tests { assert_eq!(&name.name[..], &b"gorets;a=b;c=d"[..]); assert_eq!(name.tag_pos, Some(7usize)); assert_eq!(&name.name[name.tag_pos.unwrap()..], &b"a=b;c=d"[..]); - assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, None).unwrap()); + assert_eq!( + metric, + Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, None).unwrap() + ); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap()); + assert_eq!( + metric, + Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap() + ); } #[test] fn parse_metric_with_long_tags() { - let mut data = BytesMut::from(&b"gorets;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b:+1000|g"[..]); + let mut data = BytesMut::from( + &b"gorets;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b:+1000|g"[..], + ); let mut parser = make_parser(&mut data); assert!(parser.next().is_none()); } @@ -461,9 +564,12 @@ mod tests { let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"bobets;c=d"[..]); - //assert_eq!(name.tag_pos, Some(6usize)); + assert_eq!(name.tag_pos, Some(7usize)); assert_eq!(&name.name[name.tag_pos.unwrap()..], &b"c=d"[..]); - assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, None).unwrap()); + assert_eq!( + metric, + Metric::::new(1000f64, MetricType::Gauge(None), None, None).unwrap() + ); } #[test] @@ -471,15 +577,46 @@ mod tests { let mut data = BytesMut::new(); data.extend_from_slice(b"gorets1:+1001|g\nT\x01RAi:|\x01SH\nnuggets2:-1002|s|@0.5\nMORETrasH\nFUUU\n\ngorets3:+1003|ggorets4:+1004|ms:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::ggorets5:1005|ms"); - let correct = vec![(Bytes::from("gorets1"), Metric::::new(1001f64, MetricType::Gauge(Some(1)), None, None).unwrap()), (Bytes::from("nuggets2"), Metric::::new(-1002f64, MetricType::Set(HashSet::new()), None, Some(0.5)).unwrap()), (Bytes::from("gorets3"), Metric::::new(1003f64, MetricType::Gauge(Some(1)), None, None).unwrap()), (Bytes::from("gorets4"), Metric::::new(1004f64, MetricType::Timer(Vec::new()), None, None).unwrap()), (Bytes::from("gorets5"), Metric::::new(1005f64, MetricType::Timer(Vec::new()), None, None).unwrap())]; + let correct = vec![ + ( + Bytes::from("gorets1"), + Metric::::new(1001f64, MetricType::Gauge(Some(1)), None, None).unwrap(), + ), + ( + Bytes::from("nuggets2"), + Metric::::new(-1002f64, MetricType::Set(HashSet::new()), None, Some(0.5)) + .unwrap(), + ), + ( + Bytes::from("gorets3"), + Metric::::new(1003f64, MetricType::Gauge(Some(1)), None, None).unwrap(), + ), + ( + Bytes::from("gorets4"), + Metric::::new(1004f64, MetricType::Timer(Vec::new()), None, None).unwrap(), + ), + ( + Bytes::from("gorets5"), + Metric::::new(1005f64, MetricType::Timer(Vec::new()), None, None).unwrap(), + ), + ]; for i in 1..(data.len() + 1) { // this is out test case - partially received data let mut testinput = BytesMut::from(&data[0..i]); - println!("TEST[{}] {:?}", i, String::from_utf8(Vec::from(&testinput[..])).unwrap()); + println!( + "TEST[{}] {:?}", + i, + String::from_utf8(Vec::from(&testinput[..])).unwrap() + ); let mut res = Vec::new(); // we use 20 as max_unparsed essentially to test total trash path - let parser = MetricParser::::new(&mut testinput, 20, 20, TestParseErrorHandler); + let parser = MetricParser::::new( + &mut testinput, + 20, + 20, + TestParseErrorHandler, + ); for (name, metric) in parser { res.push((name, metric)); } From ad5a9cb0fbfdde5a5b95ca6342f629cfc22b9bf8 Mon Sep 17 00:00:00 2001 From: Sergey Noskov Date: Tue, 17 Sep 2019 08:25:35 +0300 Subject: [PATCH 05/23] Tag sorting works --- Cargo.toml | 1 + src/name.rs | 108 ++++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 81 insertions(+), 28 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index abfe87f..ee53176 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ capnp = "^0.10" combine="^3.6" bytes = { version = "^0.4", features = [ "serde" ] } num-traits="^0.2" +lazysort="^0.2" [build-dependencies] capnpc = "^0.10" diff --git a/src/name.rs b/src/name.rs index e7ab7e5..ee7df4a 100644 --- a/src/name.rs +++ b/src/name.rs @@ -1,8 +1,11 @@ -use std::collections::BTreeMap; use std::hash::{Hash, Hasher}; use bytes::BytesMut; +// TODO: think error type. There is single possible error atm, so sort_tags returns () instead +// TODO: think if we need sorted tags in btreemap instead of string +// TODO: handle repeating same tags i.e. gorets;a=b;e=b;a=b:... + pub enum TagMode { Graphite, } @@ -24,6 +27,7 @@ impl MetricName { } } + /// find position where tags start, forcing re-search when it's already found pub fn find_tag_pos(&mut self, force: bool) -> bool { if force || self.tag_pos.is_none() { self.tag_pos = self.name.iter().position(|c| *c == b';') @@ -31,16 +35,51 @@ impl MetricName { self.tag_pos.is_some() } + /// returns only name, without tags, considers tag position was already found before + pub fn without_tags(&self) -> &[u8] { + if let Some(pos) = self.tag_pos { + &self.name[..pos] + } else { + &self.name[..] + } + } + /// sort tags in place using intermediate buffer, buffer length MUST be at least /// `name.len() - tag_pos` bytes /// sorting is made lexicographically - pub fn sort_tags>( + pub fn sort_tags>( &mut self, mode: TagMode, intermediate: &mut B, ) -> Result<(), ()> { - // TODO: think error type - Err(()) + if self.tag_pos.is_none() { + if !self.find_tag_pos(true) { + return Err(()); + } + } + + let intermediate: &mut [u8] = intermediate.as_mut(); + if intermediate.len() < self.name.len() - self.tag_pos.unwrap() { + return Err(()); + } + use lazysort::Sorted; + match mode { + TagMode::Graphite => { + let mut offset = 0; + for part in self.name.split(|c| *c == b';').skip(1).sorted() { + let end = offset + part.len(); + intermediate[offset..end].copy_from_slice(part); + if end < intermediate.len() - 1 { + intermediate[end] = b';'; + } + offset = end + 1; + } + offset -= 1; + let tp = self.tag_pos.unwrap() + 1; + self.name[tp..].copy_from_slice(&intermediate[..offset]); + } + } + Ok(()) } // allocate tags structure and shorten name fetching tags into BTreeMap @@ -50,8 +89,6 @@ impl MetricName { //Some(t) => t, //}; - //// TODO: handle sorting - //// TODO: handle repeating same tags i.e. gorets;a=b;e=b;a=b:... //match mode { //TagMode::Graphite => unimplemented!(), //} @@ -62,45 +99,60 @@ impl Hash for MetricName { fn hash(&self, state: &mut H) { // metric with tag position found and tag position not found should be the same self.name.hash(state); - //if self.tags.len() > 0 { + //if Some(pos) = self.tag_pos { //self.tags.hash(state); //} } } + #[cfg(test)] mod tests { use super::*; - use bytes::Bytes; + use bytes::BufMut; #[test] fn metric_name_tag_position() { - let mut name = MetricName::new(BytesMut::from(&b"gorets.bobez;a=b;c=d"[..]), Some(1)); //provide incorrect pos + let mut name = MetricName::new(BytesMut::from(&b"gorets.bobez;a=b;c=d"[..]), Some(1)); name.find_tag_pos(true); assert_eq!(name.tag_pos, Some(12)); } #[test] - fn metric_name_sort_tags_graphite() { + fn metric_name_no_tags() { + let mut name = MetricName::new(BytesMut::from(&b"gorets.bobez"[..]), None); + name.find_tag_pos(true); + assert_eq!(name.without_tags(), &b"gorets.bobez"[..]); + let mut name = MetricName::new(BytesMut::from(&b"gorets.bobez;a=b;c=d"[..]), None); + name.find_tag_pos(true); + assert_eq!(name.without_tags(), &b"gorets.bobez"[..]); + } + + #[test] + fn metric_name_sort_tags_graphite() { + let mut name = MetricName::new( + BytesMut::from(&b"gorets.bobez;t=y;a=b;c=e;u=v;c=d;c=b;aaa=z"[..]), + None, + ); name.find_tag_pos(false); - let _ = name.tag_pos.unwrap(); - - // let mut parser = make_parser(&mut data); - //let (name, metric) = parser.next().unwrap(); - //// name is still full string, including tags - //assert_eq!(&name.name[..], &b"gorets;a=b;c=d"[..]); - //assert_eq!(name.tag_pos, Some(7usize)); - //assert_eq!(&name.name[name.tag_pos.unwrap()..], &b"a=b;c=d"[..]); - //assert_eq!( - //metric, - //Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, None).unwrap() - //); - //let (name, metric) = parser.next().unwrap(); - //assert_eq!(&name.name[..], &b"gorets"[..]); - //assert_eq!( - //metric, - //Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap() - // ); + let tag_pos = name.tag_pos.unwrap(); + + let tag_len = name.name.len() - tag_pos; + assert_eq!(tag_len, 30); + + //let mut intermediate = Vec::with_capacity(tag_len); + let mut intermediate = BytesMut::new(); + intermediate.resize(tag_len - 1, 0u8); // intentionally resize to less bytes than required + assert!(name + .sort_tags(TagMode::Graphite, &mut intermediate) + .is_err()); + + intermediate.put(0u8); + assert!(name.sort_tags(TagMode::Graphite, &mut intermediate).is_ok()); + assert_eq!( + &name.name[..], + &b"gorets.bobez;a=b;aaa=z;c=b;c=d;c=e;t=y;u=v"[..] + ); } } From 82479c09541c25e2fd99bfb1129c46f1deb11148 Mon Sep 17 00:00:00 2001 From: Sergey Noskov Date: Wed, 18 Sep 2019 09:54:09 +0300 Subject: [PATCH 06/23] Remove point from aggregation, make error public --- CHANGELOG.md | 5 ++++ Cargo.toml | 2 +- src/metric.rs | 66 +++++++++++++++++++++++---------------------------- src/parser.rs | 2 +- 4 files changed, 37 insertions(+), 38 deletions(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..8c79e11 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,5 @@ +# 0.2.0 + +* metric name is now a separate structure allowing to process tag information +* tags are considered being graphite format and can be sorted so tagged metrics are matched correctly together +* metric aggregation iterator now returns suffixes without leading dot for better usage as tags diff --git a/Cargo.toml b/Cargo.toml index ee53176..477651e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "bioyino-metric" -version = "0.1.0" +version = "0.2.0" authors = ["Sergey Noskov aka Albibek "] description = "Library for handling metrics in bioyino and related projects" edition = "2018" diff --git a/src/metric.rs b/src/metric.rs index e995c2c..eb31a16 100644 --- a/src/metric.rs +++ b/src/metric.rs @@ -6,11 +6,11 @@ use capnp; use capnp::message::{Allocator, Builder, HeapAllocator}; use failure::Error; use failure_derive::Fail; -use serde_derive::{Serialize, Deserialize}; +use serde_derive::{Deserialize, Serialize}; -use crate::protocol_capnp::{metric as cmetric, metric_type, gauge}; +use crate::protocol_capnp::{gauge, metric as cmetric, metric_type}; -use num_traits::{Float, AsPrimitive}; +use num_traits::{AsPrimitive, Float}; #[derive(Fail, Debug)] pub enum MetricError { @@ -34,7 +34,7 @@ pub enum MetricError { // vector must be sorted pub fn percentile(vec: &Vec, nth: F) -> F where -F: Float + AsPrimitive, + F: Float + AsPrimitive, { let last = F::from(vec.len() - 1).unwrap(); // usize to float should be ok for both f32 and f64 if last == F::zero() { @@ -61,7 +61,7 @@ F: Float + AsPrimitive, #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum MetricType where -F: Copy + PartialEq + Debug, + F: Copy + PartialEq + Debug, { Counter, DiffCounter(F), @@ -74,7 +74,7 @@ F: Copy + PartialEq + Debug, #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct Metric where -F: Copy + PartialEq + Debug, + F: Copy + PartialEq + Debug, { pub value: F, pub mtype: MetricType, @@ -83,20 +83,17 @@ F: Copy + PartialEq + Debug, pub sampling: Option, } -pub trait FromF64 -{ +pub trait FromF64 { fn from_f64(value: f64) -> Self; } -impl FromF64 for f64 -{ +impl FromF64 for f64 { fn from_f64(value: f64) -> Self { value } } -impl FromF64 for f32 -{ +impl FromF64 for f32 { // TODO specilaization will give us a possibility to use any other float the same way fn from_f64(value: f64) -> Self { let (mantissa, exponent, sign) = Float::integer_decode(value); @@ -109,14 +106,14 @@ impl FromF64 for f32 impl Metric where -F: Float + Debug + AsPrimitive + FromF64 + Sync, + F: Float + Debug + AsPrimitive + FromF64 + Sync, { pub fn new( value: F, mtype: MetricType, timestamp: Option, sampling: Option, - ) -> Result { + ) -> Result { let mut metric = Metric { value, mtype, @@ -180,10 +177,7 @@ F: Float + Debug + AsPrimitive + FromF64 + Sync, Ok(()) } - - pub fn from_capnp<'a>( - reader: cmetric::Reader<'a>, - ) -> Result<(Bytes, Metric), MetricError> { + pub fn from_capnp<'a>(reader: cmetric::Reader<'a>) -> Result<(Bytes, Metric), MetricError> { let name = reader.get_name().map_err(MetricError::Capnp)?.into(); let value: F = F::from_f64(reader.get_value()); @@ -229,7 +223,7 @@ F: Float + Debug + AsPrimitive + FromF64 + Sync, None }, Some(reader.get_update_counter()), - ), + ), Err(_) => (None, None), }; @@ -271,7 +265,7 @@ F: Float + Debug + AsPrimitive + FromF64 + Sync, let value: f64 = (*value).as_(); timer_builder.set(idx as u32, value); }) - .last(); + .last(); } MetricType::Set(ref v) => { let mut set_builder = t_builder.init_set(v.len() as u32); @@ -280,7 +274,7 @@ F: Float + Debug + AsPrimitive + FromF64 + Sync, .map(|(idx, value)| { set_builder.set(idx as u32, *value); }) - .last(); + .last(); } } } @@ -323,7 +317,7 @@ F: Float + Debug + AsPrimitive + FromF64 + Sync, impl IntoIterator for Metric where -F: Float + Debug + FromF64 + AsPrimitive + F: Float + Debug + FromF64 + AsPrimitive, { type Item = (&'static str, F); type IntoIter = MetricIter; @@ -334,7 +328,7 @@ F: Float + Debug + FromF64 + AsPrimitive pub struct MetricIter where -F: Float + Debug + FromF64 + AsPrimitive + F: Float + Debug + FromF64 + AsPrimitive, { m: Metric, count: usize, @@ -344,7 +338,7 @@ F: Float + Debug + FromF64 + AsPrimitive impl MetricIter where -F: Float + Debug + FromF64 + AsPrimitive + F: Float + Debug + FromF64 + AsPrimitive, { fn new(mut metric: Metric) -> Self { let sum = if let MetricType::Timer(ref mut agg) = metric.mtype { @@ -364,7 +358,7 @@ F: Float + Debug + FromF64 + AsPrimitive impl Iterator for MetricIter where -F: Float + Debug + FromF64 + AsPrimitive + F: Float + Debug + FromF64 + AsPrimitive, { type Item = (&'static str, F); @@ -375,25 +369,25 @@ F: Float + Debug + FromF64 + AsPrimitive &MetricType::Gauge(_) if self.count == 0 => Some(("", self.m.value.into())), &MetricType::Timer(ref agg) => { match self.count { - 0 => Some((".count", F::from_f64(agg.len() as f64) )), + 0 => Some(("count", F::from_f64(agg.len() as f64))), // agg.len() = 0 is impossible here because of metric creation logic. // For additional panic safety and to ensure unwrapping is safe here // this will return None interrupting the iteration and making other // aggregations unreachable since they are useless in that case 1 => agg.last().map(|last| (".last", (*last).into())), - 2 => Some((".min", agg[0])), - 3 => Some((".max", agg[agg.len() - 1])), - 4 => Some((".sum", self.timer_sum.unwrap())), - 5 => Some((".median", percentile(agg, F::from_f64(0.5)))), + 2 => Some(("min", agg[0])), + 3 => Some(("max", agg[agg.len() - 1])), + 4 => Some(("sum", self.timer_sum.unwrap())), + 5 => Some(("median", percentile(agg, F::from_f64(0.5)))), 6 => { let len: F = F::from_f64(agg.len() as f64); - Some((".mean", self.timer_sum.unwrap() / len)) + Some(("mean", self.timer_sum.unwrap() / len)) } - 7 => Some((".percentile.75", percentile(agg, F::from_f64(0.75)))), - 8 => Some((".percentile.95", percentile(agg, F::from_f64(0.95)))), - 9 => Some((".percentile.98", percentile(agg, F::from_f64(0.98)))), - 10 => Some((".percentile.99", percentile(agg, F::from_f64(0.99)))), - 11 => Some((".percentile.999", percentile(agg, F::from_f64(0.999)))), + 7 => Some(("percentile.75", percentile(agg, F::from_f64(0.75)))), + 8 => Some(("percentile.95", percentile(agg, F::from_f64(0.95)))), + 9 => Some(("percentile.98", percentile(agg, F::from_f64(0.98)))), + 10 => Some(("percentile.99", percentile(agg, F::from_f64(0.99)))), + 11 => Some(("percentile.999", percentile(agg, F::from_f64(0.999)))), _ => None, } } diff --git a/src/parser.rs b/src/parser.rs index 0798323..5fcc3e0 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -172,7 +172,7 @@ where )) } -type MetricParsingError<'a> = easy::Errors; +pub type MetricParsingError<'a> = easy::Errors; #[allow(unused_variables)] pub trait ParseErrorHandler { From 3c07642414035c0cc770a419802feb1dd36619ac Mon Sep 17 00:00:00 2001 From: Sergey Noskov Date: Fri, 4 Oct 2019 09:45:07 +0300 Subject: [PATCH 07/23] Capnp fixes and some useful functions --- src/metric.rs | 73 ++++++++------------------------------- src/name.rs | 94 ++++++++++++++++++++++++++++++++------------------- 2 files changed, 75 insertions(+), 92 deletions(-) diff --git a/src/metric.rs b/src/metric.rs index eb31a16..6ff1d61 100644 --- a/src/metric.rs +++ b/src/metric.rs @@ -1,13 +1,13 @@ use std::collections::HashSet; use std::fmt::Debug; -use bytes::Bytes; use capnp; use capnp::message::{Allocator, Builder, HeapAllocator}; use failure::Error; use failure_derive::Fail; use serde_derive::{Deserialize, Serialize}; +use crate::name::MetricName; use crate::protocol_capnp::{gauge, metric as cmetric, metric_type}; use num_traits::{AsPrimitive, Float}; @@ -108,19 +108,8 @@ impl Metric where F: Float + Debug + AsPrimitive + FromF64 + Sync, { - pub fn new( - value: F, - mtype: MetricType, - timestamp: Option, - sampling: Option, - ) -> Result { - let mut metric = Metric { - value, - mtype, - timestamp, - sampling, - update_counter: 1, - }; + pub fn new(value: F, mtype: MetricType, timestamp: Option, sampling: Option) -> Result { + let mut metric = Metric { value, mtype, timestamp, sampling, update_counter: 1 }; if let MetricType::Timer(ref mut agg) = metric.mtype { agg.push(metric.value) @@ -142,11 +131,7 @@ where // FIXME: this is most probably incorrect when joining with another // non-fresh metric count2 != 1 let prev = *previous; - let diff = if new.value > prev { - new.value - prev - } else { - new.value - }; + let diff = if new.value > prev { new.value - prev } else { new.value }; *previous = new.value; self.value = self.value + diff; } @@ -177,8 +162,10 @@ where Ok(()) } - pub fn from_capnp<'a>(reader: cmetric::Reader<'a>) -> Result<(Bytes, Metric), MetricError> { + pub fn from_capnp<'a>(reader: cmetric::Reader<'a>) -> Result<(MetricName, Metric), MetricError> { let name = reader.get_name().map_err(MetricError::Capnp)?.into(); + let mut name = MetricName::new(name, None); + name.find_tag_pos(true); let value: F = F::from_f64(reader.get_value()); let mtype = reader.get_type().map_err(MetricError::Capnp)?; @@ -206,36 +193,16 @@ where } }; - let timestamp = if reader.has_timestamp() { - Some(reader.get_timestamp().map_err(MetricError::Capnp)?.get_ts()) - } else { - None - }; + let timestamp = if reader.has_timestamp() { Some(reader.get_timestamp().map_err(MetricError::Capnp)?.get_ts()) } else { None }; let (sampling, up_counter) = match reader.get_meta() { - Ok(reader) => ( - if reader.has_sampling() { - reader - .get_sampling() - .ok() - .map(|reader| reader.get_sampling()) - } else { - None - }, - Some(reader.get_update_counter()), - ), + Ok(reader) => (if reader.has_sampling() { reader.get_sampling().ok().map(|reader| reader.get_sampling()) } else { None }, Some(reader.get_update_counter())), Err(_) => (None, None), }; // we should NOT use Metric::new here because it is not a newly created metric // we'd get duplicate value in timer/set metrics if we used new - let metric: Metric = Metric { - value: value, - mtype, - timestamp, - sampling, - update_counter: if let Some(c) = up_counter { c } else { 1 }, - }; + let metric: Metric = Metric { value: value, mtype, timestamp, sampling, update_counter: if let Some(c) = up_counter { c } else { 1 } }; Ok((name, metric)) } @@ -348,11 +315,7 @@ where } else { None }; - MetricIter { - m: metric, - count: 0, - timer_sum: sum, - } + MetricIter { m: metric, count: 0, timer_sum: sum } } } @@ -374,7 +337,7 @@ where // For additional panic safety and to ensure unwrapping is safe here // this will return None interrupting the iteration and making other // aggregations unreachable since they are useless in that case - 1 => agg.last().map(|last| (".last", (*last).into())), + 1 => agg.last().map(|last| ("last", (*last).into())), 2 => Some(("min", agg[0])), 3 => Some(("max", agg[agg.len() - 1])), 4 => Some(("sum", self.timer_sum.unwrap())), @@ -426,8 +389,7 @@ mod tests { #[test] fn test_metric_capnp_diffcounter() { - let mut metric1 = - Metric::new(1f64, MetricType::DiffCounter(0.1f64), Some(20), Some(0.2)).unwrap(); + let mut metric1 = Metric::new(1f64, MetricType::DiffCounter(0.1f64), Some(20), Some(0.2)).unwrap(); let metric2 = Metric::new(1f64, MetricType::DiffCounter(0.5f64), None, None).unwrap(); metric1.aggregate(metric2).unwrap(); capnp_test(metric1); @@ -435,15 +397,10 @@ mod tests { #[test] fn test_metric_capnp_timer() { - let mut metric1 = - Metric::new(1f64, MetricType::Timer(Vec::new()), Some(10), Some(0.1)).unwrap(); + let mut metric1 = Metric::new(1f64, MetricType::Timer(Vec::new()), Some(10), Some(0.1)).unwrap(); let metric2 = Metric::new(2f64, MetricType::Timer(vec![3f64]), None, None).unwrap(); metric1.aggregate(metric2).unwrap(); - assert!(if let MetricType::Timer(ref v) = metric1.mtype { - v.len() == 3 - } else { - false - }); + assert!(if let MetricType::Timer(ref v) = metric1.mtype { v.len() == 3 } else { false }); capnp_test(metric1); } diff --git a/src/name.rs b/src/name.rs index ee7df4a..722eb56 100644 --- a/src/name.rs +++ b/src/name.rs @@ -2,17 +2,20 @@ use std::hash::{Hash, Hasher}; use bytes::BytesMut; -// TODO: think error type. There is single possible error atm, so sort_tags returns () instead -// TODO: think if we need sorted tags in btreemap instead of string -// TODO: handle repeating same tags i.e. gorets;a=b;e=b;a=b:... - -pub enum TagMode { +// TODO: Think error type. There is single possible error atm, so sort_tags returns () instead +// TODO: Think if we need sorted tags in btreemap instead of string +// TODO: Handle repeating same tags i.e. gorets;a=b;e=b;a=b:... +// TODO: Split MetricName type to two: RawMetricName and MetricName, where the former is readonly +// and guarantees the tag position was already searched for, so we can remove those "expects tag position is +// found" everywhere + +pub enum TagFormat { Graphite, } /// Contains buffer containing the full metric name including tags /// and some data to split tags from name useful for appending aggregates -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Clone)] pub struct MetricName { pub name: BytesMut, pub tag_pos: Option, @@ -21,10 +24,7 @@ pub struct MetricName { impl MetricName { pub fn new(name: BytesMut, tag_pos: Option) -> Self { - Self { - name, - tag_pos, /*tags: BTreeMap::new()*/ - } + Self { name, tag_pos /*tags: BTreeMap::new()*/ } } /// find position where tags start, forcing re-search when it's already found @@ -36,7 +36,7 @@ impl MetricName { } /// returns only name, without tags, considers tag position was already found before - pub fn without_tags(&self) -> &[u8] { + pub fn name_without_tags(&self) -> &[u8] { if let Some(pos) = self.tag_pos { &self.name[..pos] } else { @@ -44,14 +44,35 @@ impl MetricName { } } + /// returns slice with full name, including tags + pub fn name_with_tags(&self) -> &[u8] { + &self.name[..] + } + + /// returns slice with only tags, includes leading semicolon, expects tag position was already + /// found + pub fn tags_without_name(&self) -> &[u8] { + if let Some(pos) = self.tag_pos { + &self.name[pos..] + } else { + &[] + } + } + + /// returns length of tags field, including leading semicolon + /// considers tag position was already found before + pub fn tags_len(&self) -> usize { + if let Some(pos) = self.tag_pos { + self.name.len() - pos + } else { + 0 + } + } + /// sort tags in place using intermediate buffer, buffer length MUST be at least /// `name.len() - tag_pos` bytes /// sorting is made lexicographically - pub fn sort_tags>( - &mut self, - mode: TagMode, - intermediate: &mut B, - ) -> Result<(), ()> { + pub fn sort_tags>(&mut self, mode: TagFormat, intermediate: &mut B) -> Result<(), ()> { if self.tag_pos.is_none() { if !self.find_tag_pos(true) { return Err(()); @@ -64,7 +85,7 @@ impl MetricName { } use lazysort::Sorted; match mode { - TagMode::Graphite => { + TagFormat::Graphite => { let mut offset = 0; for part in self.name.split(|c| *c == b';').skip(1).sorted() { let end = offset + part.len(); @@ -83,14 +104,14 @@ impl MetricName { } // allocate tags structure and shorten name fetching tags into BTreeMap - //pub fn fetch_tags(&mut self, mode: TagMode) { + //pub fn fetch_tags(&mut self, mode: TagFormat) { //let tag_pos = match self.tag_pos { //None => return, //Some(t) => t, //}; //match mode { - //TagMode::Graphite => unimplemented!(), + //TagFormat::Graphite => unimplemented!(), //} //} } @@ -119,22 +140,32 @@ mod tests { } #[test] - fn metric_name_no_tags() { + fn metric_name_tags_len() { + let mut name = MetricName::new(BytesMut::from(&b"gorets.bobez;a=b;c=d"[..]), None); + name.find_tag_pos(true); + assert_eq!(name.tags_len(), 8); + + let mut name = MetricName::new(BytesMut::from(&b"gorets.bobezzz"[..]), None); + name.find_tag_pos(true); + assert_eq!(name.tags_len(), 0); + } + + #[test] + fn metric_name_splits() { let mut name = MetricName::new(BytesMut::from(&b"gorets.bobez"[..]), None); name.find_tag_pos(true); - assert_eq!(name.without_tags(), &b"gorets.bobez"[..]); + assert_eq!(name.name_without_tags(), &b"gorets.bobez"[..]); + assert_eq!(name.tags_without_name().len(), 0); let mut name = MetricName::new(BytesMut::from(&b"gorets.bobez;a=b;c=d"[..]), None); name.find_tag_pos(true); - assert_eq!(name.without_tags(), &b"gorets.bobez"[..]); + assert_eq!(name.name_without_tags(), &b"gorets.bobez"[..]); + assert_eq!(name.tags_without_name(), &b";a=b;c=d"[..]); } #[test] fn metric_name_sort_tags_graphite() { - let mut name = MetricName::new( - BytesMut::from(&b"gorets.bobez;t=y;a=b;c=e;u=v;c=d;c=b;aaa=z"[..]), - None, - ); + let mut name = MetricName::new(BytesMut::from(&b"gorets.bobez;t=y;a=b;c=e;u=v;c=d;c=b;aaa=z"[..]), None); name.find_tag_pos(false); let tag_pos = name.tag_pos.unwrap(); @@ -144,15 +175,10 @@ mod tests { //let mut intermediate = Vec::with_capacity(tag_len); let mut intermediate = BytesMut::new(); intermediate.resize(tag_len - 1, 0u8); // intentionally resize to less bytes than required - assert!(name - .sort_tags(TagMode::Graphite, &mut intermediate) - .is_err()); + assert!(name.sort_tags(TagFormat::Graphite, &mut intermediate).is_err()); intermediate.put(0u8); - assert!(name.sort_tags(TagMode::Graphite, &mut intermediate).is_ok()); - assert_eq!( - &name.name[..], - &b"gorets.bobez;a=b;aaa=z;c=b;c=d;c=e;t=y;u=v"[..] - ); + assert!(name.sort_tags(TagFormat::Graphite, &mut intermediate).is_ok()); + assert_eq!(&name.name[..], &b"gorets.bobez;a=b;aaa=z;c=b;c=d;c=e;t=y;u=v"[..]); } } From cef98a457016b1c50d2e6ad6c05455fb63dcacfd Mon Sep 17 00:00:00 2001 From: Sergey Noskov Date: Fri, 1 Nov 2019 09:45:46 +0300 Subject: [PATCH 08/23] Add aggregation related name processing --- src/aggregate.rs | 152 +++++++++++++++++++++++++++++++++ src/lib.rs | 1 + src/metric.rs | 103 ++++++++++------------ src/name.rs | 181 ++++++++++++++++++++++++++++++++++++--- src/parser.rs | 218 +++++++++-------------------------------------- 5 files changed, 408 insertions(+), 247 deletions(-) create mode 100644 src/aggregate.rs diff --git a/src/aggregate.rs b/src/aggregate.rs new file mode 100644 index 0000000..6c55587 --- /dev/null +++ b/src/aggregate.rs @@ -0,0 +1,152 @@ +use std::fmt::Debug; +use std::hash::{Hash, Hasher}; + +use num_traits::{AsPrimitive, Float}; +use serde_derive::{Deserialize, Serialize}; + +use crate::metric::FromF64; + +// Percentile counter. Not safe. Requires at least two elements in vector +// vector must be sorted +pub fn percentile(vec: &[F], nth: F) -> F +where + F: Float + AsPrimitive, +{ + let last = F::from(vec.len() - 1).unwrap(); // usize to float should be ok for both f32 and f64 + if last == F::zero() { + return vec[0]; + } + + let k = nth * last; + let f = k.floor(); + let c = k.ceil(); + + if c == f { + // exact nth percentile have been found + return vec[k.as_()]; + } + + let m0 = c - k; + let m1 = k - f; + let d0 = vec[f.as_()] * m0; + let d1 = vec[c.as_()] * m1; + d0 + d1 +} + +fn fill_cached_sum(agg: &[F], sum: &mut Option) +where + F: Float, +{ + if sum.is_none() { + let first = if let Some(first) = agg.first() { first } else { return }; + *sum = Some(agg.iter().skip(1).fold(*first, |acc, &v| acc + v)) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum Aggregate +where + F: Float + Debug + FromF64 + AsPrimitive, +{ + Count, + Last, + Min, + Max, + Sum, + Median, + Mean, + UpdateCount, + AggregateTag, + Percentile(F), +} + +impl Hash for Aggregate +where + F: Float + Debug + FromF64 + AsPrimitive, +{ + fn hash(&self, state: &mut H) { + match self { + Aggregate::Percentile(f) => { + let u: usize = f.as_(); //.rotate_left(5); + u.hash(state) + } + p => p.as_usize().hash(state), + } + } +} + +impl Eq for Aggregate where F: Float + Debug + FromF64 + AsPrimitive {} + +// to be consistent with hasher, we do the comparison in the same way +impl PartialEq for Aggregate +where + F: Float + Debug + FromF64 + AsPrimitive, +{ + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Aggregate::Percentile(f), Aggregate::Percentile(o)) => { + let s: usize = f.as_().rotate_left(5); + let o: usize = o.as_().rotate_left(5); + s == o + } + (s, o) => s.as_usize() == o.as_usize(), + } + } +} + +impl Aggregate +where + F: Float + Debug + FromF64 + AsPrimitive, +{ + fn as_usize(&self) -> usize { + match self { + Aggregate::Count => 0, + Aggregate::Last => 1, + Aggregate::Min => 2, + Aggregate::Max => 3, + Aggregate::Sum => 4, + Aggregate::Median => 5, + Aggregate::Mean => 6, + Aggregate::UpdateCount => 7, + Aggregate::AggregateTag => 8, + // we need this for hashing and comparison, so we just use a value different from other + // enum values + // the second thing we need here is correctness, so nobody could send us some strange + // percentile value like inf or nan (maybe there will be no such case, but just for the + // sake of correctness we'd better do this + Aggregate::Percentile(p) if !p.is_finite() => std::usize::MAX, + Aggregate::Percentile(p) if *p > FromF64::from_f64(std::usize::MAX as f64) => std::usize::MAX, + Aggregate::Percentile(p) => (*p + FromF64::from_f64(10f64)).as_(), + } + } + /// calculates the corresponding aggregate from the input vector + /// may return none if length of agg if data is required for aggregate to be count + /// agg and cached_sum must relate to the same metric between calls, giving incorrect results or + /// panics otherwise + pub fn calculate(&self, agg: &[F], cached_sum: &mut Option, update_count: f64) -> Option { + match self { + Aggregate::Count => Some(F::from_f64(agg.len() as f64)), + Aggregate::Last => agg.last().copied(), + Aggregate::Min => Some(agg[0]), + Aggregate::Max => Some(agg[agg.len() - 1]), + Aggregate::Sum => { + fill_cached_sum(agg, cached_sum); + cached_sum.map(|sum| sum) + } + Aggregate::Median => Some(percentile(agg, F::from_f64(0.5))), + Aggregate::Mean => { + // the case with len = 0 and sum != None is real here, but we intentinally let it + // panic on division by zero to get incorrect usage from code to be explicit + fill_cached_sum(agg, cached_sum); + cached_sum.map(|sum| { + let len: F = F::from_f64(agg.len() as f64); + sum / len + }) + } + + Aggregate::UpdateCount => Some(F::from_f64(update_count)), + Aggregate::AggregateTag => None, + Aggregate::Percentile(ref p) => Some(percentile(agg, *p)), + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 0cb4e1e..8ab2711 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +pub mod aggregate; pub mod metric; pub mod name; pub mod parser; diff --git a/src/metric.rs b/src/metric.rs index 6ff1d61..c7028ac 100644 --- a/src/metric.rs +++ b/src/metric.rs @@ -7,6 +7,7 @@ use failure::Error; use failure_derive::Fail; use serde_derive::{Deserialize, Serialize}; +use crate::aggregate::{percentile, Aggregate}; use crate::name::MetricName; use crate::protocol_capnp::{gauge, metric as cmetric, metric_type}; @@ -30,34 +31,6 @@ pub enum MetricError { CapnpSchema(capnp::NotInSchema), } -// Percentile counter. Not safe. Requires at least two elements in vector -// vector must be sorted -pub fn percentile(vec: &Vec, nth: F) -> F -where - F: Float + AsPrimitive, -{ - let last = F::from(vec.len() - 1).unwrap(); // usize to float should be ok for both f32 and f64 - if last == F::zero() { - return vec[0].clone(); - } - - let k = nth * last; - let f = k.floor(); - let c = k.ceil(); - - if c == f { - // exact nth percentile have been found - return vec[k.as_()].clone(); - } - - let m0 = c - k; - let m1 = k - f; - let d0 = vec[f.as_()].clone() * m0; - let d1 = vec[c.as_()].clone() * m1; - let res = d0 + d1; - res -} - #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum MetricType where @@ -97,9 +70,9 @@ impl FromF64 for f32 { // TODO specilaization will give us a possibility to use any other float the same way fn from_f64(value: f64) -> Self { let (mantissa, exponent, sign) = Float::integer_decode(value); - let sign_f = sign as f32; + let sign_f = f32::from(sign); let mantissa_f = mantissa as f32; - let exponent_f = 2f32.powf(exponent as f32); + let exponent_f = 2f32.powf(f32::from(exponent)); sign_f * mantissa_f * exponent_f } } @@ -120,7 +93,8 @@ where Ok(metric) } - pub fn aggregate(&mut self, new: Metric) -> Result<(), Error> { + /// Join self with a new incoming metric depending on type + pub fn accumulate(&mut self, new: Metric) -> Result<(), Error> { use self::MetricType::*; self.update_counter += new.update_counter; match (&mut self.mtype, new.mtype) { @@ -162,7 +136,19 @@ where Ok(()) } - pub fn from_capnp<'a>(reader: cmetric::Reader<'a>) -> Result<(MetricName, Metric), MetricError> { + /// aggregates matric with apecified aggregate + pub fn aggregate(&self, agg: &Aggregate, cached_sum: &mut Option) -> Option + where + F: Float + Debug + AsPrimitive + AsPrimitive + FromF64 + Sync, + { + match self.mtype { + MetricType::Timer(ref values) => agg.calculate(values, cached_sum, f64::from(self.update_counter)), + _ if agg == &Aggregate::UpdateCount => agg.calculate(&Vec::new(), cached_sum, f64::from(self.update_counter)), + _ => None, + } + } + + pub fn from_capnp(reader: cmetric::Reader) -> Result<(MetricName, Metric), MetricError> { let name = reader.get_name().map_err(MetricError::Capnp)?.into(); let mut name = MetricName::new(name, None); name.find_tag_pos(true); @@ -202,7 +188,7 @@ where // we should NOT use Metric::new here because it is not a newly created metric // we'd get duplicate value in timer/set metrics if we used new - let metric: Metric = Metric { value: value, mtype, timestamp, sampling, update_counter: if let Some(c) = up_counter { c } else { 1 } }; + let metric: Metric = Metric { value, mtype, timestamp, sampling, update_counter: if let Some(c) = up_counter { c } else { 1 } }; Ok((name, metric)) } @@ -286,7 +272,7 @@ impl IntoIterator for Metric where F: Float + Debug + FromF64 + AsPrimitive, { - type Item = (&'static str, F); + type Item = (Option>, F); type IntoIter = MetricIter; fn into_iter(self) -> Self::IntoIter { MetricIter::new(self) @@ -323,38 +309,39 @@ impl Iterator for MetricIter where F: Float + Debug + FromF64 + AsPrimitive, { - type Item = (&'static str, F); + type Item = (Option>, F); fn next(&mut self) -> Option { - let res: Option = match &self.m.mtype { - &MetricType::Counter if self.count == 0 => Some(("", self.m.value.into())), - &MetricType::DiffCounter(_) if self.count == 0 => Some(("", self.m.value.into())), - &MetricType::Gauge(_) if self.count == 0 => Some(("", self.m.value.into())), - &MetricType::Timer(ref agg) => { + let res: Option = match self.m.mtype { + MetricType::Counter if self.count == 0 => Some((None, self.m.value)), + MetricType::DiffCounter(_) if self.count == 0 => Some((None, self.m.value)), + MetricType::Gauge(_) if self.count == 0 => Some((None, self.m.value)), + MetricType::Timer(ref agg) => { match self.count { - 0 => Some(("count", F::from_f64(agg.len() as f64))), + 0 => Some((Some(Aggregate::Count), F::from_f64(agg.len() as f64))), // agg.len() = 0 is impossible here because of metric creation logic. // For additional panic safety and to ensure unwrapping is safe here // this will return None interrupting the iteration and making other // aggregations unreachable since they are useless in that case - 1 => agg.last().map(|last| ("last", (*last).into())), - 2 => Some(("min", agg[0])), - 3 => Some(("max", agg[agg.len() - 1])), - 4 => Some(("sum", self.timer_sum.unwrap())), - 5 => Some(("median", percentile(agg, F::from_f64(0.5)))), + 1 => agg.last().map(|last| (Some(Aggregate::Last), (*last))), + 2 => Some((Some(Aggregate::Min), agg[0])), + 3 => Some((Some(Aggregate::Max), agg[agg.len() - 1])), + 4 => Some((Some(Aggregate::Sum), self.timer_sum.unwrap())), + 5 => Some((Some(Aggregate::Median), percentile(agg, F::from_f64(0.5)))), 6 => { let len: F = F::from_f64(agg.len() as f64); - Some(("mean", self.timer_sum.unwrap() / len)) + Some((Some(Aggregate::Mean), self.timer_sum.unwrap() / len)) } - 7 => Some(("percentile.75", percentile(agg, F::from_f64(0.75)))), - 8 => Some(("percentile.95", percentile(agg, F::from_f64(0.95)))), - 9 => Some(("percentile.98", percentile(agg, F::from_f64(0.98)))), - 10 => Some(("percentile.99", percentile(agg, F::from_f64(0.99)))), - 11 => Some(("percentile.999", percentile(agg, F::from_f64(0.999)))), + 7 => Some((Some(Aggregate::UpdateCount), F::from_f64(f64::from(self.m.update_counter)))), + 8 => Some((Some(Aggregate::Percentile(F::from_f64(0.75))), percentile(agg, F::from_f64(0.75)))), + 9 => Some((Some(Aggregate::Percentile(F::from_f64(0.95))), percentile(agg, F::from_f64(0.95)))), + 10 => Some((Some(Aggregate::Percentile(F::from_f64(0.98))), percentile(agg, F::from_f64(0.98)))), + 11 => Some((Some(Aggregate::Percentile(F::from_f64(0.99))), percentile(agg, F::from_f64(0.99)))), + 12 => Some((Some(Aggregate::Percentile(F::from_f64(0.999))), percentile(agg, F::from_f64(0.999)))), _ => None, } } - &MetricType::Set(ref hs) if self.count == 0 => Some(("", F::from_f64(hs.len() as f64))), + MetricType::Set(ref hs) if self.count == 0 => Some((None, F::from_f64(hs.len() as f64))), _ => None, }; self.count += 1; @@ -383,7 +370,7 @@ mod tests { fn test_metric_capnp_counter() { let mut metric1 = Metric::new(1f64, MetricType::Counter, Some(10), Some(0.1)).unwrap(); let metric2 = Metric::new(2f64, MetricType::Counter, None, None).unwrap(); - metric1.aggregate(metric2).unwrap(); + metric1.accumulate(metric2).unwrap(); capnp_test(metric1); } @@ -391,7 +378,7 @@ mod tests { fn test_metric_capnp_diffcounter() { let mut metric1 = Metric::new(1f64, MetricType::DiffCounter(0.1f64), Some(20), Some(0.2)).unwrap(); let metric2 = Metric::new(1f64, MetricType::DiffCounter(0.5f64), None, None).unwrap(); - metric1.aggregate(metric2).unwrap(); + metric1.accumulate(metric2).unwrap(); capnp_test(metric1); } @@ -399,7 +386,7 @@ mod tests { fn test_metric_capnp_timer() { let mut metric1 = Metric::new(1f64, MetricType::Timer(Vec::new()), Some(10), Some(0.1)).unwrap(); let metric2 = Metric::new(2f64, MetricType::Timer(vec![3f64]), None, None).unwrap(); - metric1.aggregate(metric2).unwrap(); + metric1.accumulate(metric2).unwrap(); assert!(if let MetricType::Timer(ref v) = metric1.mtype { v.len() == 3 } else { false }); capnp_test(metric1); @@ -409,7 +396,7 @@ mod tests { fn test_metric_capnp_gauge() { let mut metric1 = Metric::new(1f64, MetricType::Gauge(None), Some(10), Some(0.1)).unwrap(); let metric2 = Metric::new(2f64, MetricType::Gauge(Some(-1)), None, None).unwrap(); - metric1.aggregate(metric2).unwrap(); + metric1.accumulate(metric2).unwrap(); capnp_test(metric1); } @@ -422,7 +409,7 @@ mod tests { let mut set2 = HashSet::new(); set2.extend(vec![10u64, 30u64].into_iter()); let metric2 = Metric::new(2f64, MetricType::Set(set2), None, None).unwrap(); - metric1.aggregate(metric2).unwrap(); + metric1.accumulate(metric2).unwrap(); capnp_test(metric1); } diff --git a/src/name.rs b/src/name.rs index 722eb56..7d45bf5 100644 --- a/src/name.rs +++ b/src/name.rs @@ -1,6 +1,13 @@ +use std::collections::HashMap; +use std::fmt::Debug; use std::hash::{Hash, Hasher}; -use bytes::BytesMut; +use bytes::{BufMut, BytesMut}; +use num_traits::{AsPrimitive, Float}; +use serde_derive::{Deserialize, Serialize}; + +use crate::aggregate::Aggregate; +use crate::metric::FromF64; // TODO: Think error type. There is single possible error atm, so sort_tags returns () instead // TODO: Think if we need sorted tags in btreemap instead of string @@ -9,13 +16,26 @@ use bytes::BytesMut; // and guarantees the tag position was already searched for, so we can remove those "expects tag position is // found" everywhere +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] pub enum TagFormat { Graphite, } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub enum AggregationDestination { + /// place depending on tags existence, if they do - always place in tag + Smart, + /// always place aggregate as postfix in name separated with dot + Name, + /// always place aggregate into tag, adding ones if they are not there yet + Tag, + /// always place aggregate in both tag and name postfix + Both, +} + /// Contains buffer containing the full metric name including tags /// and some data to split tags from name useful for appending aggregates -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, Eq, Clone)] pub struct MetricName { pub name: BytesMut, pub tag_pos: Option, @@ -69,14 +89,103 @@ impl MetricName { } } + /// put name into buffer with suffix added with dot after name + fn put_with_suffix(&self, buf: &mut BytesMut, suffix: &[u8]) { + let suflen = suffix.len(); + let namelen = self.name.len(); + + buf.reserve(namelen + suflen + 1); + match self.tag_pos { + None => { + buf.put_slice(&self.name); + buf.put(b'.'); + buf.put_slice(suffix); + } + Some(pos) => { + buf.put_slice(&self.name[..pos]); + buf.put(b'.'); + buf.put_slice(suffix); + buf.put_slice(&self.name[pos..]); + } + } + } + + fn put_with_fixed_tag(&self, buf: &mut BytesMut, tag_name: &[u8], tag: &[u8], only_tag: bool) { + let namelen = self.name.len(); + let semicolon = self.name[namelen - 1] == b';'; + let mut addlen = tag_name.len() + tag.len() + 1; // 1 is for `=` + if !only_tag { + addlen += namelen + } + if !semicolon { + addlen += 1 // add one more for `;` + }; + buf.reserve(addlen); + if !only_tag { + buf.put_slice(&self.name); + } + if !semicolon { + buf.put(b';'); + } + buf.put_slice(tag_name); + buf.put(b'='); + buf.put_slice(tag); + } + + //fn put_with_value_tag(&self, buf: &mut BytesMut, tag: &[u8], value: f64) { + // let namelen = self.name.len(); + + //FIXME + /* + let mut addlen = namelen + agglen + 1 + 32; // 1 is for `=`, 32 is for the float value itself + if !semicolon { + addlen += 1 // add one more for `;` + }; + buf.reserve(addlen); + buf.put_slice(&self.name); + if !semicolon { + buf.put(b';'); + } + buf.put_slice(agg_name); + buf.put(b'='); + buf.put_slice(&self.name[pos..]); + */ + //} + + /// puts a name with an aggregate to provided buffer depending on dest + /// expects tag_pos to be already found + pub fn put_with_aggregate(&self, buf: &mut BytesMut, dest: AggregationDestination, agg: Aggregate, replacements: &HashMap, String>) -> Result<(), ()> + where + F: Float + Debug + FromF64 + AsPrimitive, + { + let agg_name = replacements.get(&agg).ok_or(())?.as_bytes(); + let agg_tag = replacements.get(&Aggregate::AggregateTag).ok_or(())?.as_bytes(); + + match dest { + AggregationDestination::Smart if self.tag_pos.is_none() => { + // metric is untagged, add aggregate to name + Ok(self.put_with_suffix(buf, agg_name)) + } + AggregationDestination::Smart => { + // metric is tagged, add aggregate as tag + Ok(self.put_with_fixed_tag(buf, agg_tag, agg_name, false)) + } + AggregationDestination::Name => Ok(self.put_with_suffix(buf, agg_name)), + AggregationDestination::Tag => Ok(self.put_with_fixed_tag(buf, agg_tag, agg_name, false)), + AggregationDestination::Both => { + self.put_with_suffix(buf, agg_name); + self.put_with_fixed_tag(buf, agg_tag, agg_name, true); + Ok(()) + } + } + } + /// sort tags in place using intermediate buffer, buffer length MUST be at least /// `name.len() - tag_pos` bytes /// sorting is made lexicographically pub fn sort_tags>(&mut self, mode: TagFormat, intermediate: &mut B) -> Result<(), ()> { - if self.tag_pos.is_none() { - if !self.find_tag_pos(true) { - return Err(()); - } + if self.tag_pos.is_none() && !self.find_tag_pos(true) { + return Err(()); } let intermediate: &mut [u8] = intermediate.as_mut(); @@ -116,13 +225,16 @@ impl MetricName { //} } +impl PartialEq for MetricName { + fn eq(&self, other: &Self) -> bool { + // metric with tag position found and tag position not found should be the same + self.name == other.name + } +} + impl Hash for MetricName { fn hash(&self, state: &mut H) { - // metric with tag position found and tag position not found should be the same self.name.hash(state); - //if Some(pos) = self.tag_pos { - //self.tags.hash(state); - //} } } @@ -163,6 +275,55 @@ mod tests { assert_eq!(name.tags_without_name(), &b";a=b;c=d"[..]); } + #[test] + fn metric_aggregate_modes() { + // create some replacements + let mut replacements: HashMap, String> = HashMap::new(); + replacements.insert(Aggregate::Count, "count".to_string()); + replacements.insert(Aggregate::AggregateTag, "aggregate".to_string()); + + let mut without_tags = MetricName::new(BytesMut::from(&b"gorets.bobez"[..]), None); + let mut with_tags = MetricName::new(BytesMut::from(&b"gorets.bobez;tag=value"[..]), None); + let mut with_semicolon = MetricName::new(BytesMut::from(&b"gorets.bobez;"[..]), None); + without_tags.find_tag_pos(true); + with_tags.find_tag_pos(true); + with_semicolon.find_tag_pos(true); + + // create 0-size buffer to make sure allocation counts work as intended + let mut buf = BytesMut::new(); + + assert!(without_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, Aggregate::Max, &replacements).is_err()); + + without_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, Aggregate::Count, &replacements).unwrap(); + assert_eq!(&buf.take()[..], &b"gorets.bobez.count"[..]); + + without_tags.put_with_aggregate(&mut buf, AggregationDestination::Name, Aggregate::Count, &replacements).unwrap(); + assert_eq!(&buf.take()[..], &b"gorets.bobez.count"[..]); + + without_tags.put_with_aggregate(&mut buf, AggregationDestination::Tag, Aggregate::Count, &replacements).unwrap(); + assert_eq!(&buf.take()[..], &b"gorets.bobez;aggregate=count"[..]); + + without_tags.put_with_aggregate(&mut buf, AggregationDestination::Both, Aggregate::Count, &replacements).unwrap(); + dbg!(&buf); + assert_eq!(&buf.take()[..], &b"gorets.bobez.count;aggregate=count"[..]); + + with_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, Aggregate::Count, &replacements).unwrap(); + assert_eq!(&buf.take()[..], &b"gorets.bobez;tag=value;aggregate=count"[..]); + + with_tags.put_with_aggregate(&mut buf, AggregationDestination::Name, Aggregate::Count, &replacements).unwrap(); + assert_eq!(&buf.take()[..], &b"gorets.bobez.count;tag=value"[..]); + + with_tags.put_with_aggregate(&mut buf, AggregationDestination::Tag, Aggregate::Count, &replacements).unwrap(); + assert_eq!(&buf.take()[..], &b"gorets.bobez;tag=value;aggregate=count"[..]); + + with_tags.put_with_aggregate(&mut buf, AggregationDestination::Both, Aggregate::Count, &replacements).unwrap(); + assert_eq!(&buf.take()[..], &b"gorets.bobez.count;tag=value;aggregate=count"[..]); + + // ensure trailing semicolon is not duplicated + with_semicolon.put_with_aggregate(&mut buf, AggregationDestination::Smart, Aggregate::Count, &replacements).unwrap(); + assert_eq!(&buf.take()[..], &b"gorets.bobez;aggregate=count"[..]); + } + #[test] fn metric_name_sort_tags_graphite() { let mut name = MetricName::new(BytesMut::from(&b"gorets.bobez;t=y;a=b;c=e;u=v;c=d;c=b;aaa=z"[..]), None); diff --git a/src/parser.rs b/src/parser.rs index 5fcc3e0..f35c084 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -25,11 +25,7 @@ pub enum ParsedPart where F: Float + FromStr + Debug + AsPrimitive, { - Metric( - (PointerOffset, PointerOffset), - Option, - Metric, - ), + Metric((PointerOffset, PointerOffset), Option, Metric), Trash(PointerOffset), TotalTrash(PointerOffset), } @@ -39,10 +35,7 @@ where /// (TODO: code example) /// /// It's current goal is to be fast, use less allocs and to not depend on error and even probably input typing -pub fn metric_stream_parser<'a, I, F>( - max_unparsed: usize, - max_tags_len: usize, -) -> impl Parser, PartialState = impl Default + 'a> +pub fn metric_stream_parser<'a, I, F>(max_unparsed: usize, max_tags_len: usize) -> impl Parser, PartialState = impl Default + 'a> where I: RangeStream + std::fmt::Debug, I::Error: ParseError, @@ -56,28 +49,18 @@ where // position(), take_while1::(|c: u8| c != b';' && c != b':' && c != b'\n'), - optional(( - byte(b';'), - position(), - take_while1::(|c: u8| c != b':' && c != b'\n'), - )), + optional((byte(b';'), position(), take_while1::(|c: u8| c != b':' && c != b'\n'))), position(), ) .skip(byte(b':')) .and_then(move |(start, name, maybe_tag, stop)| { //TODO: introduce metric name limits, use is.alphabetical for each unicode char - from_utf8(name).map_err(|_e| { - StreamErrorFor::::unexpected_static_message("name part is not valid utf8") - })?; + from_utf8(name).map_err(|_e| StreamErrorFor::::unexpected_static_message("name part is not valid utf8"))?; let tag_pos = if let Some((_, tag_pos, tag)) = maybe_tag { if tag.len() > max_tags_len { - return Err(StreamErrorFor::::unexpected_static_message( - "tag part is too long", - )); + return Err(StreamErrorFor::::unexpected_static_message("tag part is too long")); } - from_utf8(tag).map_err(|_e| { - StreamErrorFor::::unexpected_static_message("tag part is not valid utf8") - })?; + from_utf8(tag).map_err(|_e| StreamErrorFor::::unexpected_static_message("tag part is not valid utf8"))?; Some(tag_pos) } else { None @@ -112,18 +95,12 @@ where .and(optional((byte(b'.'), skip_many1(digit())))) .and(optional( // - ( - byte(b'e'), - optional(byte(b'+').or(byte(b'-'))), - skip_many1(digit()), - ), + (byte(b'e'), optional(byte(b'+').or(byte(b'-'))), skip_many1(digit())), )); let sampling = (bytes(b"|@"), recognize(unsigned_float)).and_then(|(_, val)| { // TODO replace from_utf8 with handmade parser removing recognize - from_utf8(val) - .map_err(StreamErrorFor::::other) - .map(|v| v.parse::().map_err(StreamErrorFor::::other))? + from_utf8(val).map_err(StreamErrorFor::::other).map(|v| v.parse::().map_err(StreamErrorFor::::other))? }); let metric = ( @@ -131,7 +108,7 @@ where val, mtype, choice(( - sampling.map(|v| Some(v)), + sampling.map(Some), skip_many(newline()).map(|_| None), eof().map(|_| None), //skip_many(newline()).map(|_| None), @@ -154,19 +131,8 @@ where // here's what we are trying to parse choice(( // valid metric with (probably) tags - ( - skip_many(newline()), - name_with_tags, - metric, - skip_many(newline()), - ) - .map(|(_, (start, tag, stop), m, _)| ParsedPart::Metric((start, stop), tag, m)), - ( - take_until_range(&b"\n"[..]), - position(), - skip_many1(newline()), - ) - .map(|(_, pos, _)| ParsedPart::Trash(pos)), + (skip_many(newline()), name_with_tags, metric, skip_many(newline())).map(|(_, (start, tag, stop), m, _)| ParsedPart::Metric((start, stop), tag, m)), + (take_until_range(&b"\n"[..]), position(), skip_many1(newline())).map(|(_, pos, _)| ParsedPart::Trash(pos)), // trash not ending with \n, but too long to be metric (take(max_unparsed), position()).map(|(_, pos)| ParsedPart::TotalTrash(pos)), )) @@ -198,20 +164,8 @@ impl<'a, F, E> MetricParser<'a, F, E> where E: ParseErrorHandler, { - pub fn new( - input: &'a mut BytesMut, - max_unparsed: usize, - max_tags_len: usize, - handler: E, - ) -> Self { - Self { - input, - skip: 0, - max_unparsed, - max_tags_len, - handler, - _pd: PhantomData, - } + pub fn new(input: &'a mut BytesMut, max_unparsed: usize, max_tags_len: usize, handler: E) -> Self { + Self { input, skip: 0, max_unparsed, max_tags_len, handler, _pd: PhantomData } } } @@ -237,12 +191,7 @@ where //combine::stream::PartialStream(input), //&mut Default::default(), // ); - let res = decode( - parser, - combine::easy::Stream(input), - &mut Default::default(), - ); - res + decode(parser, combine::easy::Stream(input), &mut Default::default()) }; match res { @@ -277,8 +226,7 @@ where // there can be errors found before correct parsing // so we cut it off if self.skip > 0 { - self.handler - .handle(self.input, self.skip, easy::Errors::empty(name_pos.0)); + self.handler.handle(self.input, self.skip, easy::Errors::empty(name_pos.0)); self.input.advance(self.skip); } @@ -307,8 +255,7 @@ where self.skip += consumed; } // TODO error information - self.handler - .handle(self.input, self.skip, easy::Errors::empty(pos)); + self.handler.handle(self.input, self.skip, easy::Errors::empty(pos)); } Ok((Some(ParsedPart::TotalTrash(pos)), consumed)) => { // buffer is at max allowed length, but still no metrics there @@ -322,8 +269,7 @@ where // buffer can be more than max_unparsed, so we cut and continue // TODO error information - self.handler - .handle(self.input, consumed, easy::Errors::empty(pos)); + self.handler.handle(self.input, consumed, easy::Errors::empty(pos)); self.input.advance(consumed); } Err(e) => { @@ -370,12 +316,7 @@ mod tests { struct TestParseErrorHandler; impl ParseErrorHandler for TestParseErrorHandler { fn handle(&self, input: &[u8], pos: usize, e: MetricParsingError) { - println!( - "parse error at {:?} in {:?}: {:?}", - pos, - String::from_utf8_lossy(input), - e - ); + println!("parse error at {:?} in {:?}: {:?}", pos, String::from_utf8_lossy(input), e); } } @@ -389,10 +330,7 @@ mod tests { let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!( - metric, - Metric::::new(1f64, MetricType::Counter, None, Some(1f32)).unwrap() - ); + assert_eq!(metric, Metric::::new(1f64, MetricType::Counter, None, Some(1f32)).unwrap()); assert_eq!(parser.next(), None); } @@ -403,55 +341,37 @@ mod tests { let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!( - metric, - Metric::::new(12.65f64, MetricType::Counter, None, Some(1e-3f32)).unwrap() - ); + assert_eq!(metric, Metric::::new(12.65f64, MetricType::Counter, None, Some(1e-3f32)).unwrap()); assert_eq!(parser.next(), None); } #[test] fn parse_metric_with_newline() { - let mut data = BytesMut::from( - &b"complex.bioyino.test1:-1e10|g\n\ncomplex.bioyino.test10:-1e10|g\n\n\n"[..], - ); + let mut data = BytesMut::from(&b"complex.bioyino.test1:-1e10|g\n\ncomplex.bioyino.test10:-1e10|g\n\n\n"[..]); let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"complex.bioyino.test1"[..]); - assert_eq!( - metric, - Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap() - ); + assert_eq!(metric, Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap()); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"complex.bioyino.test10"[..]); - assert_eq!( - metric, - Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap() - ); + assert_eq!(metric, Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap()); assert_eq!(parser.next(), None); } #[test] fn parse_metric_without_newline() { - let mut data = - BytesMut::from(&b"complex.bioyino.test1:-1e10|gcomplex.bioyino.test10:-1e10|g"[..]); + let mut data = BytesMut::from(&b"complex.bioyino.test1:-1e10|gcomplex.bioyino.test10:-1e10|g"[..]); let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"complex.bioyino.test1"[..]); - assert_eq!( - metric, - Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap() - ); + assert_eq!(metric, Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap()); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"complex.bioyino.test10"[..]); - assert_eq!( - metric, - Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap() - ); + assert_eq!(metric, Metric::::new(1e10f64, MetricType::Gauge(Some(-1)), None, None).unwrap()); assert_eq!(parser.next(), None); } @@ -462,16 +382,10 @@ mod tests { let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!( - metric, - Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, Some(0.0004)).unwrap() - ); + assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, Some(0.0004)).unwrap()); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!( - metric, - Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap() - ); + assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap()); assert_eq!(parser.next(), None); } @@ -481,10 +395,7 @@ mod tests { let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!( - metric, - Metric::::new(1f64, MetricType::Counter, None, None).unwrap() - ); + assert_eq!(metric, Metric::::new(1f64, MetricType::Counter, None, None).unwrap()); assert_eq!(parser.next(), None); } @@ -494,16 +405,10 @@ mod tests { let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!( - metric, - Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, None).unwrap() - ); + assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, None).unwrap()); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!( - metric, - Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap() - ); + assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap()); assert_eq!(parser.next(), None); } @@ -521,10 +426,7 @@ mod tests { // Only one metric should be parsed assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!( - metric, - Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap() - ); + assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap()); } #[test] @@ -536,23 +438,15 @@ mod tests { assert_eq!(&name.name[..], &b"gorets;a=b;c=d"[..]); assert_eq!(name.tag_pos, Some(7usize)); assert_eq!(&name.name[name.tag_pos.unwrap()..], &b"a=b;c=d"[..]); - assert_eq!( - metric, - Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, None).unwrap() - ); + assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, None).unwrap()); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); - assert_eq!( - metric, - Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap() - ); + assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap()); } #[test] fn parse_metric_with_long_tags() { - let mut data = BytesMut::from( - &b"gorets;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b:+1000|g"[..], - ); + let mut data = BytesMut::from(&b"gorets;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b:+1000|g"[..]); let mut parser = make_parser(&mut data); assert!(parser.next().is_none()); } @@ -566,10 +460,7 @@ mod tests { assert_eq!(&name.name[..], &b"bobets;c=d"[..]); assert_eq!(name.tag_pos, Some(7usize)); assert_eq!(&name.name[name.tag_pos.unwrap()..], &b"c=d"[..]); - assert_eq!( - metric, - Metric::::new(1000f64, MetricType::Gauge(None), None, None).unwrap() - ); + assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(None), None, None).unwrap()); } #[test] @@ -577,46 +468,15 @@ mod tests { let mut data = BytesMut::new(); data.extend_from_slice(b"gorets1:+1001|g\nT\x01RAi:|\x01SH\nnuggets2:-1002|s|@0.5\nMORETrasH\nFUUU\n\ngorets3:+1003|ggorets4:+1004|ms:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::ggorets5:1005|ms"); - let correct = vec![ - ( - Bytes::from("gorets1"), - Metric::::new(1001f64, MetricType::Gauge(Some(1)), None, None).unwrap(), - ), - ( - Bytes::from("nuggets2"), - Metric::::new(-1002f64, MetricType::Set(HashSet::new()), None, Some(0.5)) - .unwrap(), - ), - ( - Bytes::from("gorets3"), - Metric::::new(1003f64, MetricType::Gauge(Some(1)), None, None).unwrap(), - ), - ( - Bytes::from("gorets4"), - Metric::::new(1004f64, MetricType::Timer(Vec::new()), None, None).unwrap(), - ), - ( - Bytes::from("gorets5"), - Metric::::new(1005f64, MetricType::Timer(Vec::new()), None, None).unwrap(), - ), - ]; + let correct = vec![(Bytes::from("gorets1"), Metric::::new(1001f64, MetricType::Gauge(Some(1)), None, None).unwrap()), (Bytes::from("nuggets2"), Metric::::new(-1002f64, MetricType::Set(HashSet::new()), None, Some(0.5)).unwrap()), (Bytes::from("gorets3"), Metric::::new(1003f64, MetricType::Gauge(Some(1)), None, None).unwrap()), (Bytes::from("gorets4"), Metric::::new(1004f64, MetricType::Timer(Vec::new()), None, None).unwrap()), (Bytes::from("gorets5"), Metric::::new(1005f64, MetricType::Timer(Vec::new()), None, None).unwrap())]; for i in 1..(data.len() + 1) { // this is out test case - partially received data let mut testinput = BytesMut::from(&data[0..i]); - println!( - "TEST[{}] {:?}", - i, - String::from_utf8(Vec::from(&testinput[..])).unwrap() - ); + println!("TEST[{}] {:?}", i, String::from_utf8(Vec::from(&testinput[..])).unwrap()); let mut res = Vec::new(); // we use 20 as max_unparsed essentially to test total trash path - let parser = MetricParser::::new( - &mut testinput, - 20, - 20, - TestParseErrorHandler, - ); + let parser = MetricParser::::new(&mut testinput, 20, 20, TestParseErrorHandler); for (name, metric) in parser { res.push((name, metric)); } From 6c91458acb3619c7cf75471f800d5a384b1a5894 Mon Sep 17 00:00:00 2001 From: Sergey Noskov Date: Fri, 8 Nov 2019 09:58:24 +0300 Subject: [PATCH 09/23] Add easier string to aggregate parsing --- src/aggregate.rs | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/src/aggregate.rs b/src/aggregate.rs index 6c55587..e39198c 100644 --- a/src/aggregate.rs +++ b/src/aggregate.rs @@ -1,5 +1,7 @@ +use std::convert::TryFrom; use std::fmt::Debug; use std::hash::{Hash, Hasher}; +use std::str::FromStr; use num_traits::{AsPrimitive, Float}; use serde_derive::{Deserialize, Serialize}; @@ -44,6 +46,7 @@ where } #[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case", deny_unknown_fields, try_from = "String")] pub enum Aggregate where F: Float + Debug + FromF64 + AsPrimitive, @@ -56,10 +59,46 @@ where Median, Mean, UpdateCount, + #[serde(skip)] AggregateTag, Percentile(F), } +impl TryFrom for Aggregate +where + F: Float + Debug + FromF64 + AsPrimitive, +{ + type Error = String; + + fn try_from(s: String) -> Result { + match s.as_str() { + "count" => Ok(Aggregate::Count), + "last" => Ok(Aggregate::Last), + "min" => Ok(Aggregate::Min), + "max" => Ok(Aggregate::Max), + "sum" => Ok(Aggregate::Sum), + "median" => Ok(Aggregate::Median), + "updates" => Ok(Aggregate::UpdateCount), + s if s.starts_with("percentile-") => { + // check in match guarantees minus char exists + let pos = s.chars().position(|c| c == '-').unwrap() + 1; + let num: u64 = u64::from_str(&s[pos..]).map_err(|_| "percentile value is not unsigned integer".to_owned())?; + let mut divider = 10f64; + + let num = num as f64; + // divider is f64, so it's always bigger than u64:MAX and therefore never + // overflow + while num > divider { + divider *= 10.0; + } + + Ok(Aggregate::Percentile(F::from_f64(num / divider))) + } + _ => Err("".into()), + } + } +} + impl Hash for Aggregate where F: Float + Debug + FromF64 + AsPrimitive, From d709f291da5f42520f1df22393ee773c2b1927c2 Mon Sep 17 00:00:00 2001 From: Sergey Noskov Date: Fri, 8 Nov 2019 09:58:52 +0300 Subject: [PATCH 10/23] Tag position parsing fix --- src/name.rs | 26 +++++++++++++++++++++----- src/parser.rs | 33 +++++++++++++++++++++++++-------- 2 files changed, 46 insertions(+), 13 deletions(-) diff --git a/src/name.rs b/src/name.rs index 7d45bf5..9798dde 100644 --- a/src/name.rs +++ b/src/name.rs @@ -35,6 +35,7 @@ pub enum AggregationDestination { /// Contains buffer containing the full metric name including tags /// and some data to split tags from name useful for appending aggregates +/// #[derive(Debug, Eq, Clone)] pub struct MetricName { pub name: BytesMut, @@ -47,10 +48,12 @@ impl MetricName { Self { name, tag_pos /*tags: BTreeMap::new()*/ } } - /// find position where tags start, forcing re-search when it's already found + // TODO example + /// find position where tags start, optionally forcing re-search when it's already found + /// Note, that found position points to the first semicolon, not the tags part itself pub fn find_tag_pos(&mut self, force: bool) -> bool { if force || self.tag_pos.is_none() { - self.tag_pos = self.name.iter().position(|c| *c == b';') + self.tag_pos = self.name.iter().position(|c| *c == b';'); } self.tag_pos.is_some() } @@ -185,13 +188,16 @@ impl MetricName { /// sorting is made lexicographically pub fn sort_tags>(&mut self, mode: TagFormat, intermediate: &mut B) -> Result<(), ()> { if self.tag_pos.is_none() && !self.find_tag_pos(true) { - return Err(()); + // tag position was not found, so no tags + // but it is ok since we have nothing to sort + return Ok(()); } let intermediate: &mut [u8] = intermediate.as_mut(); - if intermediate.len() < self.name.len() - self.tag_pos.unwrap() { + if intermediate.len() < (self.name.len() - self.tag_pos.unwrap()) { return Err(()); } + use lazysort::Sorted; match mode { TagFormat::Graphite => { @@ -206,6 +212,7 @@ impl MetricName { } offset -= 1; let tp = self.tag_pos.unwrap() + 1; + self.name[tp..].copy_from_slice(&intermediate[..offset]); } } @@ -326,6 +333,14 @@ mod tests { #[test] fn metric_name_sort_tags_graphite() { + let mut intermediate: Vec = Vec::new(); + let mut name = MetricName::new(BytesMut::from(&b"gorets2;tag3=shit;t2=fuck"[..]), None); + name.find_tag_pos(false); + if intermediate.len() < name.name.len() { + intermediate.resize(name.name.len(), 0u8); + } + assert!(name.sort_tags(TagFormat::Graphite, &mut intermediate).is_ok()); + let mut name = MetricName::new(BytesMut::from(&b"gorets.bobez;t=y;a=b;c=e;u=v;c=d;c=b;aaa=z"[..]), None); name.find_tag_pos(false); let tag_pos = name.tag_pos.unwrap(); @@ -334,11 +349,12 @@ mod tests { assert_eq!(tag_len, 30); //let mut intermediate = Vec::with_capacity(tag_len); + let mut intermediate = BytesMut::new(); intermediate.resize(tag_len - 1, 0u8); // intentionally resize to less bytes than required assert!(name.sort_tags(TagFormat::Graphite, &mut intermediate).is_err()); - intermediate.put(0u8); + intermediate.put(0u8); // resize to good length now assert!(name.sort_tags(TagFormat::Graphite, &mut intermediate).is_ok()); assert_eq!(&name.name[..], &b"gorets.bobez;a=b;aaa=z;c=b;c=d;c=e;t=y;u=v"[..]); } diff --git a/src/parser.rs b/src/parser.rs index f35c084..6dfa8e2 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -132,9 +132,9 @@ where choice(( // valid metric with (probably) tags (skip_many(newline()), name_with_tags, metric, skip_many(newline())).map(|(_, (start, tag, stop), m, _)| ParsedPart::Metric((start, stop), tag, m)), - (take_until_range(&b"\n"[..]), position(), skip_many1(newline())).map(|(_, pos, _)| ParsedPart::Trash(pos)), + (take_until_range(&b"\n"[..]), skip_many(newline()), position()).map(|(_, _, pos)| ParsedPart::Trash(pos)), // trash not ending with \n, but too long to be metric - (take(max_unparsed), position()).map(|(_, pos)| ParsedPart::TotalTrash(pos)), + (take(max_unparsed), skip_many(newline()), position()).map(|(_, _, pos)| ParsedPart::TotalTrash(pos)), )) } @@ -218,7 +218,7 @@ where // tag_pos is counted relative to input buffer // but we need it to be relative to name // to be related correctly, we have to shift it to `start` bytes right - let tag_pos = tag_pos.map(|pos| pos.translate_position(input) - start); + let tag_pos = tag_pos.map(|pos| pos.translate_position(input) - start - 1); // before touching the buffer calculate position to advance after name let metriclen = consumed - stop; @@ -436,8 +436,8 @@ mod tests { let (name, metric) = parser.next().unwrap(); // name is still full string, including tags assert_eq!(&name.name[..], &b"gorets;a=b;c=d"[..]); - assert_eq!(name.tag_pos, Some(7usize)); - assert_eq!(&name.name[name.tag_pos.unwrap()..], &b"a=b;c=d"[..]); + assert_eq!(name.tag_pos, Some(6usize)); + assert_eq!(&name.name[name.tag_pos.unwrap()..], &b";a=b;c=d"[..]); assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, None).unwrap()); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"gorets"[..]); @@ -454,15 +454,32 @@ mod tests { #[test] fn parse_many_metrics_with_long_tags() { // metric with long tags followed by another metric - let mut data = BytesMut::from(&b"gorets;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b:+1000|g\nbobets;c=d:1000|g"[..]); + let mut data = BytesMut::from(&b"gorets;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b;a=b:+1000|g\n\nbobets;c=d:1000|g\n\n"[..]); let mut parser = make_parser(&mut data); let (name, metric) = parser.next().unwrap(); assert_eq!(&name.name[..], &b"bobets;c=d"[..]); - assert_eq!(name.tag_pos, Some(7usize)); - assert_eq!(&name.name[name.tag_pos.unwrap()..], &b"c=d"[..]); + assert_eq!(name.tag_pos, Some(6usize)); + assert_eq!(&name.name[name.tag_pos.unwrap()..], &b";c=d"[..]); assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(None), None, None).unwrap()); } + #[test] + fn parse_trashed_metric_with_tags() { + let mut data = BytesMut::new(); + data.extend_from_slice(b"trash\ngorets1:+1000|g\nTRASH\n\n\ngorets2;tag3=shit;t2=fuck:-1000|g|@0.5\nMORE;tra=sh;|TrasH\nFUUU"); + let mut parser = make_parser(&mut data); + let (name, metric) = parser.next().unwrap(); + assert_eq!(&name.name[..], &b"gorets1"[..]); + assert_eq!(name.tag_pos, None); + assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, None).unwrap()); + + let (name, metric) = parser.next().unwrap(); + assert_eq!(&name.name[..], &b"gorets2;tag3=shit;t2=fuck"[..]); + assert_eq!(name.tag_pos, Some(7usize)); + assert_eq!(&name.name[name.tag_pos.unwrap()..], &b";tag3=shit;t2=fuck"[..]); + assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap()); + } + #[test] fn parse_split_metric_buf() { let mut data = BytesMut::new(); From f611ba0a12480be25b566d6da6c56f75a7f84592 Mon Sep 17 00:00:00 2001 From: Sergey Noskov Date: Wed, 13 Nov 2019 09:21:03 +0300 Subject: [PATCH 11/23] Fix aggregate equality, refactor calculation --- src/aggregate.rs | 178 ++++++++++++++++++++++++++++++++--------------- src/metric.rs | 13 +--- 2 files changed, 123 insertions(+), 68 deletions(-) diff --git a/src/aggregate.rs b/src/aggregate.rs index e39198c..1db6927 100644 --- a/src/aggregate.rs +++ b/src/aggregate.rs @@ -6,7 +6,7 @@ use std::str::FromStr; use num_traits::{AsPrimitive, Float}; use serde_derive::{Deserialize, Serialize}; -use crate::metric::FromF64; +use crate::metric::{FromF64, Metric, MetricType}; // Percentile counter. Not safe. Requires at least two elements in vector // vector must be sorted @@ -51,6 +51,8 @@ pub enum Aggregate where F: Float + Debug + FromF64 + AsPrimitive, { + #[serde(skip)] + Value, Count, Last, Min, @@ -105,11 +107,29 @@ where { fn hash(&self, state: &mut H) { match self { - Aggregate::Percentile(f) => { - let u: usize = f.as_(); //.rotate_left(5); - u.hash(state) + Aggregate::Value => 0usize.hash(state), + Aggregate::Count => 1usize.hash(state), + Aggregate::Last => 2usize.hash(state), + Aggregate::Min => 3usize.hash(state), + Aggregate::Max => 4usize.hash(state), + Aggregate::Sum => 5usize.hash(state), + Aggregate::Median => 6usize.hash(state), + Aggregate::Mean => 7usize.hash(state), + Aggregate::UpdateCount => 8usize.hash(state), + Aggregate::AggregateTag => 9usize.hash(state), + // we need this for hashing and comparison, so we just use a value different from other + // enum values + // the second thing we need here is correctness, so nobody could send us some strange + // percentile value like inf or nan (maybe there will be no such case, but just for the + // sake of correctness we'd better do this + Aggregate::Percentile(p) if !p.is_finite() => 11usize.hash(state), //std::usize::MAX, + Aggregate::Percentile(p) => { + let (mantissa, exponent, sign) = Float::integer_decode(*p); + 11usize.hash(state); + mantissa.hash(state); + exponent.hash(state); + sign.hash(state); } - p => p.as_usize().hash(state), } } } @@ -123,12 +143,26 @@ where { fn eq(&self, other: &Self) -> bool { match (self, other) { - (Aggregate::Percentile(f), Aggregate::Percentile(o)) => { - let s: usize = f.as_().rotate_left(5); - let o: usize = o.as_().rotate_left(5); - s == o + (Aggregate::Count, Aggregate::Count) => true, + (Aggregate::Last, Aggregate::Last) => true, + (Aggregate::Min, Aggregate::Min) => true, + (Aggregate::Max, Aggregate::Max) => true, + (Aggregate::Sum, Aggregate::Sum) => true, + (Aggregate::Median, Aggregate::Median) => true, + (Aggregate::Mean, Aggregate::Mean) => true, + (Aggregate::UpdateCount, Aggregate::UpdateCount) => true, + (Aggregate::AggregateTag, Aggregate::AggregateTag) => true, + // we need this for hashing and comparison, so we just use a value different from other + // percentile value like inf or nan (maybe there will be no such case, but just for the + // sake of correctness we'd better do this + (Aggregate::Percentile(p), Aggregate::Percentile(o)) => { + if p.is_normal() && o.is_normal() { + Float::integer_decode(*p) == Float::integer_decode(*o) + } else { + p.classify() == o.classify() + } } - (s, o) => s.as_usize() == o.as_usize(), + _ => false, } } } @@ -137,55 +171,87 @@ impl Aggregate where F: Float + Debug + FromF64 + AsPrimitive, { - fn as_usize(&self) -> usize { - match self { - Aggregate::Count => 0, - Aggregate::Last => 1, - Aggregate::Min => 2, - Aggregate::Max => 3, - Aggregate::Sum => 4, - Aggregate::Median => 5, - Aggregate::Mean => 6, - Aggregate::UpdateCount => 7, - Aggregate::AggregateTag => 8, - // we need this for hashing and comparison, so we just use a value different from other - // enum values - // the second thing we need here is correctness, so nobody could send us some strange - // percentile value like inf or nan (maybe there will be no such case, but just for the - // sake of correctness we'd better do this - Aggregate::Percentile(p) if !p.is_finite() => std::usize::MAX, - Aggregate::Percentile(p) if *p > FromF64::from_f64(std::usize::MAX as f64) => std::usize::MAX, - Aggregate::Percentile(p) => (*p + FromF64::from_f64(10f64)).as_(), + /// calculates the corresponding aggregate from the input metric + /// returne None in all inapplicable cases, like zero-length vector or metric type mismatch + /// cached_sum must relate to the same metric between calls, giving incorrect results or + /// panics otherwise + pub fn calculate(&self, metric: &Metric, cached_sum: &mut Option) -> Option { + // TODO: test + match metric.mtype { + // for sets calculate only count + MetricType::Set(ref hs) if self == &Aggregate::Count => Some(F::from_f64(hs.len() as f64)), + // don't count values for timers and sets + MetricType::Set(_) if self == &Aggregate::Value => None, + MetricType::Timer(_) if self == &Aggregate::Value => None, + // for timers calculate all aggregates + MetricType::Timer(ref agg) => match self { + Aggregate::Value => None, + Aggregate::Count => Some(F::from_f64(agg.len() as f64)), + Aggregate::Last => agg.last().copied(), + Aggregate::Min => Some(agg[0]), + Aggregate::Max => Some(agg[agg.len() - 1]), + Aggregate::Sum => { + fill_cached_sum(agg, cached_sum); + cached_sum.map(|sum| sum) + } + Aggregate::Median => Some(percentile(agg, F::from_f64(0.5))), + Aggregate::Mean => { + // the case with len = 0 and sum != None is real here, but we intentinally let it + // panic on division by zero to get incorrect usage from code to be explicit + fill_cached_sum(agg, cached_sum); + cached_sum.map(|sum| { + let len: F = F::from_f64(agg.len() as f64); + sum / len + }) + } + Aggregate::UpdateCount => Some(F::from_f64(f64::from(metric.update_counter))), + Aggregate::AggregateTag => None, + Aggregate::Percentile(ref p) => Some(percentile(agg, *p)), + }, + // cout value for all types except timers (matched above) + _ if self == &Aggregate::Value => Some(metric.value), + // for other types calculate only update counter + _ if self == &Aggregate::UpdateCount => Some(F::from_f64(f64::from(metric.update_counter))), + _ => None, } } - /// calculates the corresponding aggregate from the input vector - /// may return none if length of agg if data is required for aggregate to be count - /// agg and cached_sum must relate to the same metric between calls, giving incorrect results or - /// panics otherwise - pub fn calculate(&self, agg: &[F], cached_sum: &mut Option, update_count: f64) -> Option { - match self { - Aggregate::Count => Some(F::from_f64(agg.len() as f64)), - Aggregate::Last => agg.last().copied(), - Aggregate::Min => Some(agg[0]), - Aggregate::Max => Some(agg[agg.len() - 1]), - Aggregate::Sum => { - fill_cached_sum(agg, cached_sum); - cached_sum.map(|sum| sum) - } - Aggregate::Median => Some(percentile(agg, F::from_f64(0.5))), - Aggregate::Mean => { - // the case with len = 0 and sum != None is real here, but we intentinally let it - // panic on division by zero to get incorrect usage from code to be explicit - fill_cached_sum(agg, cached_sum); - cached_sum.map(|sum| { - let len: F = F::from_f64(agg.len() as f64); - sum / len - }) - } +} + +#[cfg(test)] +mod tests { + use super::*; + + use std::collections::HashMap; + + #[test] + fn aggregates_eq_and_hashing_f32() { + let c32: Aggregate = Aggregate::Count; + assert!(c32 != Aggregate::Min); + + assert!(Aggregate::Percentile(0.75f32) != Aggregate::Percentile(0.999)); + + let mut hm = HashMap::new(); + // ensure hashing works good for typical percentiles up to 99999 + for p in 1..100_000u32 { + hm.insert(Aggregate::Percentile(1f32 / p as f32), ()); + hm.insert(Aggregate::Percentile(1f32 / p as f32), ()); + } + assert_eq!(hm.len(), 100000 - 1); + } + + #[test] + fn aggregates_eq_and_hashing_f64() { + let c64: Aggregate = Aggregate::Count; + assert!(c64 != Aggregate::Min); + + assert!(Aggregate::Percentile(0.75f64) != Aggregate::Percentile(0.999f64)); - Aggregate::UpdateCount => Some(F::from_f64(update_count)), - Aggregate::AggregateTag => None, - Aggregate::Percentile(ref p) => Some(percentile(agg, *p)), + let mut hm = HashMap::new(); + // ensure hashing works good for typical percentiles up to 99999 + for p in 1..100_000u32 { + hm.insert(Aggregate::Percentile(1f64 / f64::from(p)), ()); + hm.insert(Aggregate::Percentile(1f64 / f64::from(p)), ()); } + assert_eq!(hm.len(), 100000 - 1); } } diff --git a/src/metric.rs b/src/metric.rs index c7028ac..fbc35f5 100644 --- a/src/metric.rs +++ b/src/metric.rs @@ -136,18 +136,6 @@ where Ok(()) } - /// aggregates matric with apecified aggregate - pub fn aggregate(&self, agg: &Aggregate, cached_sum: &mut Option) -> Option - where - F: Float + Debug + AsPrimitive + AsPrimitive + FromF64 + Sync, - { - match self.mtype { - MetricType::Timer(ref values) => agg.calculate(values, cached_sum, f64::from(self.update_counter)), - _ if agg == &Aggregate::UpdateCount => agg.calculate(&Vec::new(), cached_sum, f64::from(self.update_counter)), - _ => None, - } - } - pub fn from_capnp(reader: cmetric::Reader) -> Result<(MetricName, Metric), MetricError> { let name = reader.get_name().map_err(MetricError::Capnp)?.into(); let mut name = MetricName::new(name, None); @@ -279,6 +267,7 @@ where } } +//#[deprecated(since = "0.1.0", note = "iterator only iterates over non-customized metrics, use aggregate method")] pub struct MetricIter where F: Float + Debug + FromF64 + AsPrimitive, From f2bde6c0123c207f9fc60b631e90c9e113882af0 Mon Sep 17 00:00:00 2001 From: Sergey Noskov Date: Wed, 13 Nov 2019 09:23:52 +0300 Subject: [PATCH 12/23] Add separate naming replacement --- src/name.rs | 135 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 86 insertions(+), 49 deletions(-) diff --git a/src/name.rs b/src/name.rs index 9798dde..dafbc1c 100644 --- a/src/name.rs +++ b/src/name.rs @@ -135,49 +135,54 @@ impl MetricName { buf.put_slice(tag); } - //fn put_with_value_tag(&self, buf: &mut BytesMut, tag: &[u8], value: f64) { - // let namelen = self.name.len(); - - //FIXME - /* - let mut addlen = namelen + agglen + 1 + 32; // 1 is for `=`, 32 is for the float value itself - if !semicolon { - addlen += 1 // add one more for `;` - }; - buf.reserve(addlen); - buf.put_slice(&self.name); - if !semicolon { - buf.put(b';'); - } - buf.put_slice(agg_name); - buf.put(b'='); - buf.put_slice(&self.name[pos..]); - */ - //} - /// puts a name with an aggregate to provided buffer depending on dest /// expects tag_pos to be already found - pub fn put_with_aggregate(&self, buf: &mut BytesMut, dest: AggregationDestination, agg: Aggregate, replacements: &HashMap, String>) -> Result<(), ()> + pub fn put_with_aggregate( + // rustfmt + &self, + buf: &mut BytesMut, + dest: AggregationDestination, + agg: &Aggregate, + postfix_replacements: &HashMap, String>, + tag_replacements: &HashMap, String>, + ) -> Result<(), ()> where F: Float + Debug + FromF64 + AsPrimitive, { - let agg_name = replacements.get(&agg).ok_or(())?.as_bytes(); - let agg_tag = replacements.get(&Aggregate::AggregateTag).ok_or(())?.as_bytes(); + // we should not use let agg_postfix before the match, because with the tag case we don't + // need it + // the same applies to tag name and value: we only need them when aggregating to tags match dest { AggregationDestination::Smart if self.tag_pos.is_none() => { + let agg_postfix = postfix_replacements.get(agg).ok_or(())?.as_bytes(); // metric is untagged, add aggregate to name - Ok(self.put_with_suffix(buf, agg_name)) + Ok(self.put_with_suffix(buf, agg_postfix)) } AggregationDestination::Smart => { + let agg_tag_name = tag_replacements.get(&Aggregate::AggregateTag).ok_or(())?.as_bytes(); + let agg_tag_value = tag_replacements.get(agg).ok_or(())?.as_bytes(); + // metric is tagged, add aggregate as tag - Ok(self.put_with_fixed_tag(buf, agg_tag, agg_name, false)) + Ok(self.put_with_fixed_tag(buf, agg_tag_name, agg_tag_value, false)) + } + AggregationDestination::Name => { + let agg_postfix = postfix_replacements.get(agg).ok_or(())?.as_bytes(); + Ok(self.put_with_suffix(buf, agg_postfix)) + } + AggregationDestination::Tag => { + let agg_tag_name = tag_replacements.get(&Aggregate::AggregateTag).ok_or(())?.as_bytes(); + let agg_tag_value = tag_replacements.get(agg).ok_or(())?.as_bytes(); + + Ok(self.put_with_fixed_tag(buf, agg_tag_name, agg_tag_value, false)) } - AggregationDestination::Name => Ok(self.put_with_suffix(buf, agg_name)), - AggregationDestination::Tag => Ok(self.put_with_fixed_tag(buf, agg_tag, agg_name, false)), AggregationDestination::Both => { - self.put_with_suffix(buf, agg_name); - self.put_with_fixed_tag(buf, agg_tag, agg_name, true); + let agg_postfix = postfix_replacements.get(agg).ok_or(())?.as_bytes(); + let agg_tag_name = tag_replacements.get(&Aggregate::AggregateTag).ok_or(())?.as_bytes(); + let agg_tag_value = tag_replacements.get(agg).ok_or(())?.as_bytes(); + + self.put_with_suffix(buf, agg_postfix); + self.put_with_fixed_tag(buf, agg_tag_name, agg_tag_value, true); Ok(()) } } @@ -285,9 +290,15 @@ mod tests { #[test] fn metric_aggregate_modes() { // create some replacements - let mut replacements: HashMap, String> = HashMap::new(); - replacements.insert(Aggregate::Count, "count".to_string()); - replacements.insert(Aggregate::AggregateTag, "aggregate".to_string()); + let mut p_reps: HashMap, String> = HashMap::new(); + p_reps.insert(Aggregate::Count, "count".to_string()); + p_reps.insert(Aggregate::AggregateTag, "MUST NOT BE USED".to_string()); + p_reps.insert(Aggregate::Percentile(0.8f64), "percentile80".to_string()); + + let mut t_reps: HashMap, String> = HashMap::new(); + t_reps.insert(Aggregate::Count, "cnt".to_string()); + t_reps.insert(Aggregate::AggregateTag, "agg".to_string()); + // intentionally skip adding percentile80 let mut without_tags = MetricName::new(BytesMut::from(&b"gorets.bobez"[..]), None); let mut with_tags = MetricName::new(BytesMut::from(&b"gorets.bobez;tag=value"[..]), None); @@ -299,36 +310,62 @@ mod tests { // create 0-size buffer to make sure allocation counts work as intended let mut buf = BytesMut::new(); - assert!(without_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, Aggregate::Max, &replacements).is_err()); + // --------- without_tags - without_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, Aggregate::Count, &replacements).unwrap(); + // max is not in replacements + assert!(without_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Max, &p_reps, &t_reps).is_err(), "non existing replacement gave no error"); + + without_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Count, &p_reps, &t_reps).unwrap(); assert_eq!(&buf.take()[..], &b"gorets.bobez.count"[..]); - without_tags.put_with_aggregate(&mut buf, AggregationDestination::Name, Aggregate::Count, &replacements).unwrap(); + without_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Percentile(0.8f64), &p_reps, &t_reps).unwrap(); + assert_eq!(&buf.take()[..], &b"gorets.bobez.percentile80"[..], "existing postfix replacement was not put"); + + without_tags.put_with_aggregate(&mut buf, AggregationDestination::Name, &Aggregate::Count, &p_reps, &t_reps).unwrap(); assert_eq!(&buf.take()[..], &b"gorets.bobez.count"[..]); - without_tags.put_with_aggregate(&mut buf, AggregationDestination::Tag, Aggregate::Count, &replacements).unwrap(); - assert_eq!(&buf.take()[..], &b"gorets.bobez;aggregate=count"[..]); + without_tags.put_with_aggregate(&mut buf, AggregationDestination::Name, &Aggregate::Percentile(0.8f64), &p_reps, &t_reps).unwrap(); + assert_eq!(&buf.take()[..], &b"gorets.bobez.percentile80"[..], "existing postfix replacement was not put"); + + without_tags.put_with_aggregate(&mut buf, AggregationDestination::Tag, &Aggregate::Count, &p_reps, &t_reps).unwrap(); + assert_eq!(&buf.take()[..], &b"gorets.bobez;agg=cnt"[..]); + + let err = without_tags.put_with_aggregate(&mut buf, AggregationDestination::Tag, &Aggregate::Percentile(0.8f64), &p_reps, &t_reps); + assert_eq!(err, Err(()), "p80 aggregated into tags whilt it should not"); + + without_tags.put_with_aggregate(&mut buf, AggregationDestination::Both, &Aggregate::Count, &p_reps, &t_reps).unwrap(); + assert_eq!(&buf.take()[..], &b"gorets.bobez.count;agg=cnt"[..]); - without_tags.put_with_aggregate(&mut buf, AggregationDestination::Both, Aggregate::Count, &replacements).unwrap(); - dbg!(&buf); - assert_eq!(&buf.take()[..], &b"gorets.bobez.count;aggregate=count"[..]); + let err = without_tags.put_with_aggregate(&mut buf, AggregationDestination::Both, &Aggregate::Percentile(0.8f64), &p_reps, &t_reps); + assert_eq!(err, Err(()), "p80 aggregated into tags whilt it should not"); - with_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, Aggregate::Count, &replacements).unwrap(); - assert_eq!(&buf.take()[..], &b"gorets.bobez;tag=value;aggregate=count"[..]); + // --------- with_tags + assert!(with_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Max, &p_reps, &t_reps).is_err()); - with_tags.put_with_aggregate(&mut buf, AggregationDestination::Name, Aggregate::Count, &replacements).unwrap(); + with_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Count, &p_reps, &t_reps).unwrap(); + assert_eq!(&buf.take()[..], &b"gorets.bobez;tag=value;agg=cnt"[..]); + + let err = with_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Percentile(0.8f64), &p_reps, &t_reps); + assert_eq!(err, Err(()), "p80 aggregated into tags whilt it should not"); + + with_tags.put_with_aggregate(&mut buf, AggregationDestination::Name, &Aggregate::Count, &p_reps, &t_reps).unwrap(); assert_eq!(&buf.take()[..], &b"gorets.bobez.count;tag=value"[..]); - with_tags.put_with_aggregate(&mut buf, AggregationDestination::Tag, Aggregate::Count, &replacements).unwrap(); - assert_eq!(&buf.take()[..], &b"gorets.bobez;tag=value;aggregate=count"[..]); + with_tags.put_with_aggregate(&mut buf, AggregationDestination::Tag, &Aggregate::Count, &p_reps, &t_reps).unwrap(); + assert_eq!(&buf.take()[..], &b"gorets.bobez;tag=value;agg=cnt"[..]); + + let err = with_tags.put_with_aggregate(&mut buf, AggregationDestination::Tag, &Aggregate::Percentile(0.8f64), &p_reps, &t_reps); + assert_eq!(err, Err(()), "p80 aggregated into tags whilt it should not"); + + with_tags.put_with_aggregate(&mut buf, AggregationDestination::Both, &Aggregate::Count, &p_reps, &t_reps).unwrap(); + assert_eq!(&buf.take()[..], &b"gorets.bobez.count;tag=value;agg=cnt"[..]); - with_tags.put_with_aggregate(&mut buf, AggregationDestination::Both, Aggregate::Count, &replacements).unwrap(); - assert_eq!(&buf.take()[..], &b"gorets.bobez.count;tag=value;aggregate=count"[..]); + let err = with_tags.put_with_aggregate(&mut buf, AggregationDestination::Both, &Aggregate::Percentile(0.8f64), &p_reps, &t_reps); + assert_eq!(err, Err(()), "p80 aggregated into tags whilt it should not"); // ensure trailing semicolon is not duplicated - with_semicolon.put_with_aggregate(&mut buf, AggregationDestination::Smart, Aggregate::Count, &replacements).unwrap(); - assert_eq!(&buf.take()[..], &b"gorets.bobez;aggregate=count"[..]); + with_semicolon.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Count, &p_reps, &t_reps).unwrap(); + assert_eq!(&buf.take()[..], &b"gorets.bobez;agg=cnt"[..]); } #[test] From 9e6e033807f7b027e2ae3343885adf219de1ef4f Mon Sep 17 00:00:00 2001 From: Sergey Noskov Date: Wed, 13 Nov 2019 10:10:12 +0300 Subject: [PATCH 13/23] Kebab case for aggregation destination --- src/name.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/name.rs b/src/name.rs index dafbc1c..78d4f2e 100644 --- a/src/name.rs +++ b/src/name.rs @@ -22,6 +22,7 @@ pub enum TagFormat { } #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case", deny_unknown_fields)] pub enum AggregationDestination { /// place depending on tags existence, if they do - always place in tag Smart, From 874a53e80e52776318a68665422905d473db9855 Mon Sep 17 00:00:00 2001 From: Sergey Noskov Date: Tue, 26 Nov 2019 10:06:20 +0300 Subject: [PATCH 14/23] Add forgotten mean aggregate parsing --- src/aggregate.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aggregate.rs b/src/aggregate.rs index 1db6927..2d1a863 100644 --- a/src/aggregate.rs +++ b/src/aggregate.rs @@ -80,6 +80,7 @@ where "max" => Ok(Aggregate::Max), "sum" => Ok(Aggregate::Sum), "median" => Ok(Aggregate::Median), + "mean" => Ok(Aggregate::Mean), "updates" => Ok(Aggregate::UpdateCount), s if s.starts_with("percentile-") => { // check in match guarantees minus char exists @@ -176,7 +177,6 @@ where /// cached_sum must relate to the same metric between calls, giving incorrect results or /// panics otherwise pub fn calculate(&self, metric: &Metric, cached_sum: &mut Option) -> Option { - // TODO: test match metric.mtype { // for sets calculate only count MetricType::Set(ref hs) if self == &Aggregate::Count => Some(F::from_f64(hs.len() as f64)), From 5b6784be56d1f7eed5112515f9c39f6f8202d90f Mon Sep 17 00:00:00 2001 From: Sergey Noskov Date: Tue, 26 Nov 2019 10:06:57 +0300 Subject: [PATCH 15/23] Small useful changes --- src/lib.rs | 1 + src/name.rs | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 8ab2711..eb91ea2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ pub mod name; pub mod parser; pub use crate::metric::*; +pub use crate::name::MetricName; pub mod protocol_capnp { include!(concat!(env!("OUT_DIR"), "/schema/protocol_capnp.rs")); diff --git a/src/name.rs b/src/name.rs index 78d4f2e..ca8fbd5 100644 --- a/src/name.rs +++ b/src/name.rs @@ -59,6 +59,11 @@ impl MetricName { self.tag_pos.is_some() } + /// true if metric is tagged + pub fn has_tags(&mut self) -> bool { + self.tag_pos.is_some() + } + /// returns only name, without tags, considers tag position was already found before pub fn name_without_tags(&self) -> &[u8] { if let Some(pos) = self.tag_pos { From 308b277547b428cdec61c9a40bee6fbb7dec4e58 Mon Sep 17 00:00:00 2001 From: Sergey Noskov Date: Thu, 28 Nov 2019 08:46:12 +0300 Subject: [PATCH 16/23] Add aggregation iterator and unit test --- src/aggregate.rs | 214 +++++++++++++++++++++++++++++++++++++++++++++-- src/metric.rs | 91 ++------------------ 2 files changed, 214 insertions(+), 91 deletions(-) diff --git a/src/aggregate.rs b/src/aggregate.rs index 2d1a863..af96069 100644 --- a/src/aggregate.rs +++ b/src/aggregate.rs @@ -144,6 +144,7 @@ where { fn eq(&self, other: &Self) -> bool { match (self, other) { + (Aggregate::Value, Aggregate::Value) => true, (Aggregate::Count, Aggregate::Count) => true, (Aggregate::Last, Aggregate::Last) => true, (Aggregate::Min, Aggregate::Min) => true, @@ -177,14 +178,14 @@ where /// cached_sum must relate to the same metric between calls, giving incorrect results or /// panics otherwise pub fn calculate(&self, metric: &Metric, cached_sum: &mut Option) -> Option { - match metric.mtype { + match (&metric.mtype, self) { // for sets calculate only count - MetricType::Set(ref hs) if self == &Aggregate::Count => Some(F::from_f64(hs.len() as f64)), + (MetricType::Set(ref hs), &Aggregate::Count) => Some(F::from_f64(hs.len() as f64)), // don't count values for timers and sets - MetricType::Set(_) if self == &Aggregate::Value => None, - MetricType::Timer(_) if self == &Aggregate::Value => None, + (MetricType::Set(_), &Aggregate::Value) => None, + (MetricType::Timer(_), &Aggregate::Value) => None, // for timers calculate all aggregates - MetricType::Timer(ref agg) => match self { + (MetricType::Timer(ref agg), s) => match s { Aggregate::Value => None, Aggregate::Count => Some(F::from_f64(agg.len() as f64)), Aggregate::Last => agg.last().copied(), @@ -209,19 +210,66 @@ where Aggregate::Percentile(ref p) => Some(percentile(agg, *p)), }, // cout value for all types except timers (matched above) - _ if self == &Aggregate::Value => Some(metric.value), + (_, &Aggregate::Value) => Some(metric.value), // for other types calculate only update counter - _ if self == &Aggregate::UpdateCount => Some(F::from_f64(f64::from(metric.update_counter))), + (_, &Aggregate::UpdateCount) => Some(F::from_f64(f64::from(metric.update_counter))), _ => None, } } } +/// A state for calculating all aggregates over metric +/// Implements iterator returning the index of aggregate in the input and the aggregate value +/// if such value should exist for an aggregate +pub struct AggregateCalculator<'a, F> +where + F: Float + Debug + FromF64 + AsPrimitive, +{ + metric: &'a Metric, + timer_sum: Option, + aggregates: &'a [Aggregate], + current: usize, +} + +impl<'a, F> AggregateCalculator<'a, F> +where + F: Float + Debug + FromF64 + AsPrimitive, +{ + pub fn new(metric: &'a mut Metric, aggregates: &'a [Aggregate]) -> Self { + let timer_sum = if let MetricType::Timer(ref mut agg) = metric.mtype { + agg.sort_unstable_by(|ref v1, ref v2| v1.partial_cmp(v2).unwrap()); + let first = agg.first().unwrap(); + Some(agg.iter().skip(1).fold(*first, |acc, &v| acc + v)) + } else { + None + }; + Self { metric, timer_sum, aggregates, current: 0 } + } +} + +impl<'a, F> Iterator for AggregateCalculator<'a, F> +where + F: Float + Debug + FromF64 + AsPrimitive, +{ + type Item = Option<(usize, F)>; + + fn next(&mut self) -> Option { + if self.current >= self.aggregates.len() { + return None; + } + + let agg = &self.aggregates[self.current]; + let calc = agg.calculate(self.metric, &mut self.timer_sum).map(|result| (self.current, result)); + self.current += 1; + Some(calc) + } +} + #[cfg(test)] mod tests { use super::*; - use std::collections::HashMap; + use std::collections::{HashMap, HashSet}; #[test] fn aggregates_eq_and_hashing_f32() { @@ -254,4 +302,154 @@ mod tests { } assert_eq!(hm.len(), 100000 - 1); } + + #[test] + fn aggregating_with_iterator() { + // create aggregates list + // this also tests all aggregates are parsed + let aggregates = vec!["count", "min", "updates", "max", "sum", "median", "percentile-85"]; + let mut aggregates = aggregates.into_iter().map(|s| Aggregate::try_from(s.to_string()).unwrap()).collect::>>(); + // add hidden value aggregator + aggregates.push(Aggregate::Value); + + let samples = vec![12f64, 43f64, 1f64, 9f64, 84f64, 55f64, 31f64, 16f64, 64f64]; + let mut expected = HashMap::new(); + + let mut metrics = Vec::new(); + aggregates.iter().map(|agg| expected.insert(agg.clone(), Vec::new())).last(); + // NOTE: when modifying this test, push expected aggregates to vector in the same order as `aggregates` vector + // (this is needed for equality test to work as intended) + + // TODO: gauge signs + let mut gauge = Metric::new(1f64, MetricType::Gauge(None), None, None).unwrap(); + samples + .iter() + .map(|t| { + let gauge2 = Metric::new(*t, MetricType::Gauge(None), None, None).unwrap(); + gauge.accumulate(gauge2).unwrap(); + }) + .last(); + + metrics.push(gauge.clone()); + + // gauges only consider last value + expected.get_mut(&Aggregate::Value).unwrap().push((gauge.clone(), 64f64)); + expected.get_mut(&Aggregate::UpdateCount).unwrap().push((gauge.clone(), 10f64)); + + // counters must be aggregated into two aggregates: value and update counter + let mut counter = Metric::new(1f64, MetricType::Counter, None, None).unwrap(); + let mut sign = 1f64; + samples + .iter() + .map(|t| { + sign = -sign; + let counter2 = Metric::new(*t * sign, MetricType::Counter, None, None).unwrap(); + counter.accumulate(counter2).unwrap(); + }) + .last(); + + metrics.push(counter.clone()); + + // aggregated value for counter is a sum of all incoming data considering signs + expected.get_mut(&Aggregate::Value).unwrap().push((counter.clone(), -68f64)); + expected.get_mut(&Aggregate::UpdateCount).unwrap().push((counter.clone(), 10f64)); + + // diff counter is when you send a counter value as is, but it's considered increasing and + // only positive diff adds up + let mut dcounter = Metric::new(1f64, MetricType::DiffCounter(0.0), None, Some(0.1)).unwrap(); + samples + .iter() + .map(|t| { + let dcounter2 = Metric::new(*t, MetricType::DiffCounter(0.0), None, Some(0.1)).unwrap(); + dcounter.accumulate(dcounter2).unwrap(); + }) + .last(); + + metrics.push(dcounter.clone()); + + expected.get_mut(&Aggregate::Value).unwrap().push((dcounter.clone(), 278f64)); + expected.get_mut(&Aggregate::UpdateCount).unwrap().push((dcounter.clone(), 10f64)); + + // timers should be properly aggregated into all agregates except value + let mut timer = Metric::new(1f64, MetricType::Timer(Vec::new()), None, None).unwrap(); + + samples + .iter() + .map(|t| { + let timer2 = Metric::new(*t, MetricType::Timer(Vec::new()), None, None).unwrap(); + timer.accumulate(timer2).unwrap(); + }) + .last(); + + expected.get_mut(&Aggregate::Count).unwrap().push((timer.clone(), 10f64)); + expected.get_mut(&Aggregate::Min).unwrap().push((timer.clone(), 1f64)); + expected.get_mut(&Aggregate::UpdateCount).unwrap().push((timer.clone(), 10f64)); + expected.get_mut(&Aggregate::Max).unwrap().push((timer.clone(), 84f64)); + expected.get_mut(&Aggregate::Sum).unwrap().push((timer.clone(), 316f64)); + // percentiles( 0.5 and 0.85 ) was counted in google spreadsheets + expected.get_mut(&Aggregate::Median).unwrap().push((timer.clone(), 23.5f64)); + expected.get_mut(&Aggregate::Percentile(0.85)).unwrap().push((timer.clone(), 60.85f64)); + metrics.push(timer); + + // sets should be properly aggregated into count of uniques and update count + let mut set = Metric::new(1f64, MetricType::Set(HashSet::new()), None, None).unwrap(); + + samples + .iter() + .map(|t| { + let set2 = Metric::new(*t, MetricType::Set(HashSet::new()), None, None).unwrap(); + set.accumulate(set2).unwrap(); + }) + .last(); + + expected.get_mut(&Aggregate::Count).unwrap().push((set.clone(), 9f64)); + expected.get_mut(&Aggregate::UpdateCount).unwrap().push((set.clone(), 10f64)); + // percentiles( 0.5 and 0.85 ) was counted in google spreadsheets + metrics.push(set); + + let mut results = HashMap::new(); + metrics + .into_iter() + .map(|metric| { + let mut calc_metric = metric.clone(); + let calculator = AggregateCalculator::new(&mut calc_metric, &aggregates); + calculator + // .inspect(|res| { + //dbg!(res); + //}) + // count all of them that are countable (filtering None) and leaving the aggregate itself + .filter_map(|result| result) + // set corresponding name + .map(|(idx, value)| { + // it would be better to use metric as a key, but it doesn't implement Eq, + // so atm we trick it this way + if !results.contains_key(&aggregates[idx]) { + results.insert(aggregates[idx].clone(), Vec::new()); + } + results.get_mut(&aggregates[idx]).unwrap().push((metric.clone(), value)); + }) + .last(); + }) + .last(); + + //dbg!(&expected, &results); + assert_eq!(expected.len(), results.len(), "expected len does not match results len"); + for (ec, ev) in &expected { + //dbg!(ec); + let rv = results.get(ec).expect("expected key not found in results"); + assert_eq!(ev.len(), rv.len()); + // for percentile a value can be a bit not equal + if ec == &Aggregate::Percentile(0.85f64) { + ev.iter() + .zip(rv.iter()) + .map(|((_, e), (_, r))| { + let diff = (e - r).abs(); + assert!(diff < 0.0001, format!("{} ~= {}: {}", e, r, diff)); + }) + .last(); + } else { + assert_eq!(ev, rv); + } + } + } } diff --git a/src/metric.rs b/src/metric.rs index fbc35f5..8b4399d 100644 --- a/src/metric.rs +++ b/src/metric.rs @@ -7,7 +7,6 @@ use failure::Error; use failure_derive::Fail; use serde_derive::{Deserialize, Serialize}; -use crate::aggregate::{percentile, Aggregate}; use crate::name::MetricName; use crate::protocol_capnp::{gauge, metric as cmetric, metric_type}; @@ -76,6 +75,14 @@ impl FromF64 for f32 { sign_f * mantissa_f * exponent_f } } +// TODO +//impl Eq for Metric +//F: PartialEq, +//{ +//fn eq(&self, other: &Self) -> bool { +////self.self.value == other.value +//} +//} impl Metric where @@ -256,88 +263,6 @@ where } } -impl IntoIterator for Metric -where - F: Float + Debug + FromF64 + AsPrimitive, -{ - type Item = (Option>, F); - type IntoIter = MetricIter; - fn into_iter(self) -> Self::IntoIter { - MetricIter::new(self) - } -} - -//#[deprecated(since = "0.1.0", note = "iterator only iterates over non-customized metrics, use aggregate method")] -pub struct MetricIter -where - F: Float + Debug + FromF64 + AsPrimitive, -{ - m: Metric, - count: usize, - // cached sum to avoid recounting - timer_sum: Option, -} - -impl MetricIter -where - F: Float + Debug + FromF64 + AsPrimitive, -{ - fn new(mut metric: Metric) -> Self { - let sum = if let MetricType::Timer(ref mut agg) = metric.mtype { - agg.sort_unstable_by(|ref v1, ref v2| v1.partial_cmp(v2).unwrap()); - let first = agg.first().unwrap(); - Some(agg.iter().skip(1).fold(*first, |acc, &v| acc + v)) - } else { - None - }; - MetricIter { m: metric, count: 0, timer_sum: sum } - } -} - -impl Iterator for MetricIter -where - F: Float + Debug + FromF64 + AsPrimitive, -{ - type Item = (Option>, F); - - fn next(&mut self) -> Option { - let res: Option = match self.m.mtype { - MetricType::Counter if self.count == 0 => Some((None, self.m.value)), - MetricType::DiffCounter(_) if self.count == 0 => Some((None, self.m.value)), - MetricType::Gauge(_) if self.count == 0 => Some((None, self.m.value)), - MetricType::Timer(ref agg) => { - match self.count { - 0 => Some((Some(Aggregate::Count), F::from_f64(agg.len() as f64))), - // agg.len() = 0 is impossible here because of metric creation logic. - // For additional panic safety and to ensure unwrapping is safe here - // this will return None interrupting the iteration and making other - // aggregations unreachable since they are useless in that case - 1 => agg.last().map(|last| (Some(Aggregate::Last), (*last))), - 2 => Some((Some(Aggregate::Min), agg[0])), - 3 => Some((Some(Aggregate::Max), agg[agg.len() - 1])), - 4 => Some((Some(Aggregate::Sum), self.timer_sum.unwrap())), - 5 => Some((Some(Aggregate::Median), percentile(agg, F::from_f64(0.5)))), - 6 => { - let len: F = F::from_f64(agg.len() as f64); - Some((Some(Aggregate::Mean), self.timer_sum.unwrap() / len)) - } - 7 => Some((Some(Aggregate::UpdateCount), F::from_f64(f64::from(self.m.update_counter)))), - 8 => Some((Some(Aggregate::Percentile(F::from_f64(0.75))), percentile(agg, F::from_f64(0.75)))), - 9 => Some((Some(Aggregate::Percentile(F::from_f64(0.95))), percentile(agg, F::from_f64(0.95)))), - 10 => Some((Some(Aggregate::Percentile(F::from_f64(0.98))), percentile(agg, F::from_f64(0.98)))), - 11 => Some((Some(Aggregate::Percentile(F::from_f64(0.99))), percentile(agg, F::from_f64(0.99)))), - 12 => Some((Some(Aggregate::Percentile(F::from_f64(0.999))), percentile(agg, F::from_f64(0.999)))), - _ => None, - } - } - MetricType::Set(ref hs) if self.count == 0 => Some((None, F::from_f64(hs.len() as f64))), - _ => None, - }; - self.count += 1; - res - } -} - #[cfg(test)] mod tests { From ff6779768efaeb976f50c7caa4a87fdad0e798c3 Mon Sep 17 00:00:00 2001 From: Sergey Noskov Date: Thu, 28 Nov 2019 08:46:43 +0300 Subject: [PATCH 17/23] Correct value aggregation naming --- src/name.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/name.rs b/src/name.rs index ca8fbd5..ee82c5c 100644 --- a/src/name.rs +++ b/src/name.rs @@ -155,6 +155,14 @@ impl MetricName { where F: Float + Debug + FromF64 + AsPrimitive, { + // for value aggregate ignore replacements and other shit + if agg == &Aggregate::Value { + let namelen = self.name.len(); + buf.reserve(namelen); + buf.put_slice(&self.name); + return Ok(()); + } + // we should not use let agg_postfix before the match, because with the tag case we don't // need it // the same applies to tag name and value: we only need them when aggregating to tags @@ -321,6 +329,9 @@ mod tests { // max is not in replacements assert!(without_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Max, &p_reps, &t_reps).is_err(), "non existing replacement gave no error"); + without_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Value, &p_reps, &t_reps).unwrap(); + assert_eq!(&buf.take()[..], &b"gorets.bobez"[..]); + without_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Count, &p_reps, &t_reps).unwrap(); assert_eq!(&buf.take()[..], &b"gorets.bobez.count"[..]); @@ -348,6 +359,9 @@ mod tests { // --------- with_tags assert!(with_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Max, &p_reps, &t_reps).is_err()); + with_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Value, &p_reps, &t_reps).unwrap(); + assert_eq!(&buf.take()[..], &b"gorets.bobez;tag=value"[..]); + with_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Count, &p_reps, &t_reps).unwrap(); assert_eq!(&buf.take()[..], &b"gorets.bobez;tag=value;agg=cnt"[..]); @@ -370,6 +384,9 @@ mod tests { assert_eq!(err, Err(()), "p80 aggregated into tags whilt it should not"); // ensure trailing semicolon is not duplicated + with_semicolon.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Value, &p_reps, &t_reps).unwrap(); + assert_eq!(&buf.take()[..], &b"gorets.bobez;"[..]); + with_semicolon.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Count, &p_reps, &t_reps).unwrap(); assert_eq!(&buf.take()[..], &b"gorets.bobez;agg=cnt"[..]); } From c64df609bb872761156a5fe8a35a05a8ef379eae Mon Sep 17 00:00:00 2001 From: Sergey Noskov Date: Fri, 13 Dec 2019 07:57:31 +0300 Subject: [PATCH 18/23] Refactor MatricName for more correct handling --- src/aggregate.rs | 18 +- src/metric.rs | 41 ++- src/name.rs | 655 +++++++++++++++++++++++++++++++++++------------ src/parser.rs | 64 ++++- 4 files changed, 589 insertions(+), 189 deletions(-) diff --git a/src/aggregate.rs b/src/aggregate.rs index af96069..8004112 100644 --- a/src/aggregate.rs +++ b/src/aggregate.rs @@ -40,7 +40,11 @@ where F: Float, { if sum.is_none() { - let first = if let Some(first) = agg.first() { first } else { return }; + let first = if let Some(first) = agg.first() { + first + } else { + return; + }; *sum = Some(agg.iter().skip(1).fold(*first, |acc, &v| acc + v)) } } @@ -243,7 +247,12 @@ where } else { None }; - Self { metric, timer_sum, aggregates, current: 0 } + Self { + metric, + timer_sum, + aggregates, + current: 0, + } } } @@ -308,7 +317,10 @@ mod tests { // create aggregates list // this also tests all aggregates are parsed let aggregates = vec!["count", "min", "updates", "max", "sum", "median", "percentile-85"]; - let mut aggregates = aggregates.into_iter().map(|s| Aggregate::try_from(s.to_string()).unwrap()).collect::>>(); + let mut aggregates = aggregates + .into_iter() + .map(|s| Aggregate::try_from(s.to_string()).unwrap()) + .collect::>>(); // add hidden value aggregator aggregates.push(Aggregate::Value); diff --git a/src/metric.rs b/src/metric.rs index 8b4399d..0a3841b 100644 --- a/src/metric.rs +++ b/src/metric.rs @@ -1,13 +1,14 @@ use std::collections::HashSet; use std::fmt::Debug; +use bytes::Bytes; use capnp; use capnp::message::{Allocator, Builder, HeapAllocator}; use failure::Error; use failure_derive::Fail; use serde_derive::{Deserialize, Serialize}; -use crate::name::MetricName; +use crate::name::{find_tag_pos, MetricName, TagFormat}; use crate::protocol_capnp::{gauge, metric as cmetric, metric_type}; use num_traits::{AsPrimitive, Float}; @@ -75,6 +76,7 @@ impl FromF64 for f32 { sign_f * mantissa_f * exponent_f } } + // TODO //impl Eq for Metric //F: PartialEq, @@ -89,7 +91,13 @@ where F: Float + Debug + AsPrimitive + FromF64 + Sync, { pub fn new(value: F, mtype: MetricType, timestamp: Option, sampling: Option) -> Result { - let mut metric = Metric { value, mtype, timestamp, sampling, update_counter: 1 }; + let mut metric = Metric { + value, + mtype, + timestamp, + sampling, + update_counter: 1, + }; if let MetricType::Timer(ref mut agg) = metric.mtype { agg.push(metric.value) @@ -144,9 +152,9 @@ where } pub fn from_capnp(reader: cmetric::Reader) -> Result<(MetricName, Metric), MetricError> { - let name = reader.get_name().map_err(MetricError::Capnp)?.into(); - let mut name = MetricName::new(name, None); - name.find_tag_pos(true); + let name: Bytes = reader.get_name().map_err(MetricError::Capnp)?.into(); + let tag_pos = find_tag_pos(&name[..], TagFormat::Graphite); + let name = MetricName::from_raw_parts(name, tag_pos); let value: F = F::from_f64(reader.get_value()); let mtype = reader.get_type().map_err(MetricError::Capnp)?; @@ -174,16 +182,33 @@ where } }; - let timestamp = if reader.has_timestamp() { Some(reader.get_timestamp().map_err(MetricError::Capnp)?.get_ts()) } else { None }; + let timestamp = if reader.has_timestamp() { + Some(reader.get_timestamp().map_err(MetricError::Capnp)?.get_ts()) + } else { + None + }; let (sampling, up_counter) = match reader.get_meta() { - Ok(reader) => (if reader.has_sampling() { reader.get_sampling().ok().map(|reader| reader.get_sampling()) } else { None }, Some(reader.get_update_counter())), + Ok(reader) => ( + if reader.has_sampling() { + reader.get_sampling().ok().map(|reader| reader.get_sampling()) + } else { + None + }, + Some(reader.get_update_counter()), + ), Err(_) => (None, None), }; // we should NOT use Metric::new here because it is not a newly created metric // we'd get duplicate value in timer/set metrics if we used new - let metric: Metric = Metric { value, mtype, timestamp, sampling, update_counter: if let Some(c) = up_counter { c } else { 1 } }; + let metric: Metric = Metric { + value, + mtype, + timestamp, + sampling, + update_counter: if let Some(c) = up_counter { c } else { 1 }, + }; Ok((name, metric)) } diff --git a/src/name.rs b/src/name.rs index ee82c5c..cdcea2a 100644 --- a/src/name.rs +++ b/src/name.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use std::fmt::Debug; use std::hash::{Hash, Hasher}; -use bytes::{BufMut, BytesMut}; +use bytes::{BufMut, Bytes, BytesMut}; use num_traits::{AsPrimitive, Float}; use serde_derive::{Deserialize, Serialize}; @@ -10,11 +10,8 @@ use crate::aggregate::Aggregate; use crate::metric::FromF64; // TODO: Think error type. There is single possible error atm, so sort_tags returns () instead -// TODO: Think if we need sorted tags in btreemap instead of string +// TODO: Think if we need sorted tags in btreemap instead of string (at the moment of writing this we don't, because of allocation) // TODO: Handle repeating same tags i.e. gorets;a=b;e=b;a=b:... -// TODO: Split MetricName type to two: RawMetricName and MetricName, where the former is readonly -// and guarantees the tag position was already searched for, so we can remove those "expects tag position is -// found" everywhere #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] pub enum TagFormat { @@ -34,31 +31,119 @@ pub enum AggregationDestination { Both, } +pub fn find_tag_pos(name: &[u8], mode: TagFormat) -> Option { + match mode { + TagFormat::Graphite => name.iter().position(|c| *c == b';'), + } +} + +/// Sorts tags inside name using intermediate buffer +pub(crate) fn sort_tags(name: &mut [u8], mode: TagFormat, intermediate: &mut [u8], tag_pos: usize) -> Result { + if intermediate.len() < (name.len() - tag_pos) { + return Err(()); + } + + use lazysort::Sorted; + match mode { + TagFormat::Graphite => { + // There is one special case: the metric without tags, but received as tagged + // (i.e. with trailing semicolon) + // we want to save this semicolon for metric to stay tagged and tag position to remain + // correct + if tag_pos == name.len() - 1 { + return Ok(name.len()); + } + + let mut offset = 0; // meaningful length of data in intermediate buffer + let mut cutlen = 0; // length to cut from name because of removed empty tags + for part in name.split(|c| *c == b';').skip(1).sorted() { + if part.len() == 0 { + // offset += 1; + cutlen += 1; + } else { + let end = offset + part.len(); + intermediate[offset..end].copy_from_slice(part); + if end < intermediate.len() - 1 { + intermediate[end] = b';'; + } + offset = end + 1; + } + } + + let newlen = if intermediate[offset] == b';' { + offset -= 1; + name.len() - cutlen - 1 + } else { + name.len() - cutlen + }; + + if offset > 0 { + offset -= 1; + name[tag_pos + 1..newlen].copy_from_slice(&intermediate[..offset]); + } + + return Ok(newlen); + } + }; +} + /// Contains buffer containing the full metric name including tags -/// and some data to split tags from name useful for appending aggregates -/// +/// and some data to work with tags #[derive(Debug, Eq, Clone)] pub struct MetricName { - pub name: BytesMut, - pub tag_pos: Option, + pub name: Bytes, + pub(crate) tag_pos: Option, + //pub(crate) tag_format: TagFormat, // TODO we mayn need this in future //pub tags: BTreeMap, // we need btreemap to have tags sorted } impl MetricName { - pub fn new(name: BytesMut, tag_pos: Option) -> Self { - Self { name, tag_pos /*tags: BTreeMap::new()*/ } + pub fn new>(mut name: BytesMut, mode: TagFormat, intermediate: &mut B) -> Result { + let tag_pos = find_tag_pos(&name[..], mode); + // sort tags in place using intermediate buffer, buffer length MUST be at least + // `name.len() - tag_pos` bytes + // sorting is made lexicographically + match tag_pos { + // tag position was not found, so no tags + // but it is ok since we have nothing to sort + None => return Ok(Self { name: name.freeze(), tag_pos }), + Some(pos) => { + let intermediate: &mut [u8] = intermediate.as_mut(); + let newlen = sort_tags(&mut name[..], mode, intermediate, pos)?; + name.truncate(newlen); + } + }; + + Ok(Self { + name: name.freeze(), + tag_pos, /*tag_format*/ + }) } - // TODO example - /// find position where tags start, optionally forcing re-search when it's already found - /// Note, that found position points to the first semicolon, not the tags part itself - pub fn find_tag_pos(&mut self, force: bool) -> bool { - if force || self.tag_pos.is_none() { - self.tag_pos = self.name.iter().position(|c| *c == b';'); + /// Convenience method to create metric that is for sure has no tags in any format + pub fn new_untagged(name: BytesMut) -> Self { + Self { + name: name.freeze(), + tag_pos: None, } - self.tag_pos.is_some() } + /// Assemble name from internal parts *without checks*. Tags must be sorted, tag position must + /// be found according to mode + pub fn from_raw_parts(name: Bytes, tag_pos: Option) -> Self { + Self { name, tag_pos } + } + + // TODO example + // find position where tags start, optionally forcing re-search when it's already found + // Note, that found position points to the first semicolon, not the tags part itself + // fn find_tag_pos(&mut self) -> bool { + //if force || self.tag_pos.is_none() { + //self.tag_pos = self.name.iter().position(|c| *c == b';'); + //} + //self.tag_pos.is_some() + // } + /// true if metric is tagged pub fn has_tags(&mut self) -> bool { self.tag_pos.is_some() @@ -99,7 +184,7 @@ impl MetricName { } /// put name into buffer with suffix added with dot after name - fn put_with_suffix(&self, buf: &mut BytesMut, suffix: &[u8]) { + fn put_with_suffix(&self, buf: &mut BytesMut, suffix: &[u8], with_tags: bool) { let suflen = suffix.len(); let namelen = self.name.len(); @@ -107,49 +192,132 @@ impl MetricName { match self.tag_pos { None => { buf.put_slice(&self.name); - buf.put(b'.'); - buf.put_slice(suffix); + if suflen > 0 { + buf.put(b'.'); + buf.put_slice(suffix); + } } Some(pos) => { buf.put_slice(&self.name[..pos]); - buf.put(b'.'); - buf.put_slice(suffix); - buf.put_slice(&self.name[pos..]); + if suflen > 0 { + buf.put(b'.'); + buf.put_slice(suffix); + } + if with_tags { + buf.put_slice(&self.name[pos..]); + } } } } fn put_with_fixed_tag(&self, buf: &mut BytesMut, tag_name: &[u8], tag: &[u8], only_tag: bool) { - let namelen = self.name.len(); - let semicolon = self.name[namelen - 1] == b';'; + // we must put tag in sorted order + // only_tag = true means name part has been placed WITHOUT trailing semicolon + + // always add *at least* this amount of bytes let mut addlen = tag_name.len() + tag.len() + 1; // 1 is for `=` + let namelen = self.name.len(); + if !only_tag { addlen += namelen } - if !semicolon { - addlen += 1 // add one more for `;` + + buf.reserve(addlen + 1); // 1 is for `;` + match self.tag_pos { + None => { + if !only_tag { + buf.put_slice(&self.name); + } + // easy case: no tags + if self.name[namelen - 1] != b';' { + buf.put(b';'); + } + buf.put_slice(tag_name); + buf.put(b'='); + buf.put_slice(tag); + } + Some(pos) => { + // put the name itself anyways + if !only_tag { + buf.extend_from_slice(&self.name[..pos]); // add name without the semicolon + } + + // knowing tags are already sorted + // find position to place tag + let mut offset = pos + 1; // no point to compare first semicolon + + for part in self.name[pos + 1..].split(|c| *c == b';') { + if part < tag_name { + offset += part.len() + 1; // always shift considering semicolon at the end + } else { + break; + } + } + + // at this point offset is the position of name split + // name is always in buffer, without trailing semicolon + if offset >= namelen { + // new tag is put after all tags + + // put the whole name with all old tags including first semicolon + buf.extend_from_slice(&self.name[pos..]); + + // prepend new tag with semicolon if required + if self.name[namelen - 1] != b';' { + buf.put(b';'); + } + + // put new tag + buf.put_slice(tag_name); + buf.put(b'='); + buf.put_slice(tag); + } else if offset == pos + 1 { + // new tag is put before all tags + + // put the new tag with semicolon + buf.put(b';'); + buf.put_slice(tag_name); + buf.put(b'='); + buf.put_slice(tag); + + // put other tags with leading semicolon + buf.extend_from_slice(&self.name[pos..]); + } else { + dbg!("MID", String::from_utf8_lossy(&buf[..])); + dbg!("MID1??", &self.name); + dbg!("MID1", String::from_utf8_lossy(&self.name[..])); + dbg!("MID2", String::from_utf8_lossy(&self.name[pos..offset])); + // put tags before offset including first semicolon + buf.extend_from_slice(&self.name[pos..offset]); + + // put the new tag + buf.put_slice(tag_name); + buf.put(b'='); + buf.put_slice(tag); + + if self.name[offset] != b';' { + buf.put(b';'); + } + + buf.extend_from_slice(&self.name[offset..]); + } + // + } }; - buf.reserve(addlen); - if !only_tag { - buf.put_slice(&self.name); - } - if !semicolon { - buf.put(b';'); - } - buf.put_slice(tag_name); - buf.put(b'='); - buf.put_slice(tag); } /// puts a name with an aggregate to provided buffer depending on dest - /// expects tag_pos to be already found + /// to avoid putting different aggregates into same names + /// requires all replacements to exist, giving error otherwise + /// does no checks on overriding though pub fn put_with_aggregate( // rustfmt &self, buf: &mut BytesMut, - dest: AggregationDestination, + dest: &AggregationDestination, agg: &Aggregate, postfix_replacements: &HashMap, String>, + prefix_replacements: &HashMap, String>, tag_replacements: &HashMap, String>, ) -> Result<(), ()> where @@ -163,6 +331,14 @@ impl MetricName { return Ok(()); } + // find and put prefix first + let prefix = prefix_replacements.get(agg).ok_or(())?; + if prefix.len() > 0 { + buf.reserve(prefix.len() + 1); + buf.put(prefix); + buf.put(b'.'); + } + // we should not use let agg_postfix before the match, because with the tag case we don't // need it // the same applies to tag name and value: we only need them when aggregating to tags @@ -171,7 +347,7 @@ impl MetricName { AggregationDestination::Smart if self.tag_pos.is_none() => { let agg_postfix = postfix_replacements.get(agg).ok_or(())?.as_bytes(); // metric is untagged, add aggregate to name - Ok(self.put_with_suffix(buf, agg_postfix)) + Ok(self.put_with_suffix(buf, agg_postfix, true)) } AggregationDestination::Smart => { let agg_tag_name = tag_replacements.get(&Aggregate::AggregateTag).ok_or(())?.as_bytes(); @@ -182,7 +358,7 @@ impl MetricName { } AggregationDestination::Name => { let agg_postfix = postfix_replacements.get(agg).ok_or(())?.as_bytes(); - Ok(self.put_with_suffix(buf, agg_postfix)) + Ok(self.put_with_suffix(buf, agg_postfix, true)) } AggregationDestination::Tag => { let agg_tag_name = tag_replacements.get(&Aggregate::AggregateTag).ok_or(())?.as_bytes(); @@ -195,60 +371,12 @@ impl MetricName { let agg_tag_name = tag_replacements.get(&Aggregate::AggregateTag).ok_or(())?.as_bytes(); let agg_tag_value = tag_replacements.get(agg).ok_or(())?.as_bytes(); - self.put_with_suffix(buf, agg_postfix); + self.put_with_suffix(buf, agg_postfix, false); self.put_with_fixed_tag(buf, agg_tag_name, agg_tag_value, true); Ok(()) } } } - - /// sort tags in place using intermediate buffer, buffer length MUST be at least - /// `name.len() - tag_pos` bytes - /// sorting is made lexicographically - pub fn sort_tags>(&mut self, mode: TagFormat, intermediate: &mut B) -> Result<(), ()> { - if self.tag_pos.is_none() && !self.find_tag_pos(true) { - // tag position was not found, so no tags - // but it is ok since we have nothing to sort - return Ok(()); - } - - let intermediate: &mut [u8] = intermediate.as_mut(); - if intermediate.len() < (self.name.len() - self.tag_pos.unwrap()) { - return Err(()); - } - - use lazysort::Sorted; - match mode { - TagFormat::Graphite => { - let mut offset = 0; - for part in self.name.split(|c| *c == b';').skip(1).sorted() { - let end = offset + part.len(); - intermediate[offset..end].copy_from_slice(part); - if end < intermediate.len() - 1 { - intermediate[end] = b';'; - } - offset = end + 1; - } - offset -= 1; - let tp = self.tag_pos.unwrap() + 1; - - self.name[tp..].copy_from_slice(&intermediate[..offset]); - } - } - Ok(()) - } - - // allocate tags structure and shorten name fetching tags into BTreeMap - //pub fn fetch_tags(&mut self, mode: TagFormat) { - //let tag_pos = match self.tag_pos { - //None => return, - //Some(t) => t, - //}; - - //match mode { - //TagFormat::Graphite => unimplemented!(), - //} - //} } impl PartialEq for MetricName { @@ -268,58 +396,125 @@ impl Hash for MetricName { mod tests { use super::*; - use bytes::BufMut; + fn new_name_graphite(n: &[u8]) -> MetricName { + let mut buf = Vec::with_capacity(9000); + buf.resize(9000, 0u8); + MetricName::new(BytesMut::from(n), TagFormat::Graphite, &mut buf).unwrap() + } + + fn assert_buf(buf: &mut BytesMut, match_: &[u8], error: &'static str) { + let res = &buf.take()[..]; + assert_eq!( + res, + match_, + "\n{}:\n{}\n{}", + error, + String::from_utf8_lossy(match_), + String::from_utf8_lossy(res) + ); + } #[test] fn metric_name_tag_position() { - let mut name = MetricName::new(BytesMut::from(&b"gorets.bobez;a=b;c=d"[..]), Some(1)); - name.find_tag_pos(true); - assert_eq!(name.tag_pos, Some(12)); + let name = Bytes::from(&b"gorets.bobez;a=b;c=d"[..]); + assert_eq!(find_tag_pos(&name[..], TagFormat::Graphite), Some(12)); } #[test] fn metric_name_tags_len() { - let mut name = MetricName::new(BytesMut::from(&b"gorets.bobez;a=b;c=d"[..]), None); - name.find_tag_pos(true); + let name = new_name_graphite(b"gorets.bobez;a=b;c=d"); assert_eq!(name.tags_len(), 8); - let mut name = MetricName::new(BytesMut::from(&b"gorets.bobezzz"[..]), None); - name.find_tag_pos(true); + let name = new_name_graphite(&b"gorets.bobezzz"[..]); assert_eq!(name.tags_len(), 0); } #[test] fn metric_name_splits() { - let mut name = MetricName::new(BytesMut::from(&b"gorets.bobez"[..]), None); - name.find_tag_pos(true); + let name = new_name_graphite(&b"gorets.bobez"[..]); assert_eq!(name.name_without_tags(), &b"gorets.bobez"[..]); assert_eq!(name.tags_without_name().len(), 0); - let mut name = MetricName::new(BytesMut::from(&b"gorets.bobez;a=b;c=d"[..]), None); - name.find_tag_pos(true); + let name = new_name_graphite(&b"gorets.bobez;a=b;c=d"[..]); assert_eq!(name.name_without_tags(), &b"gorets.bobez"[..]); assert_eq!(name.tags_without_name(), &b";a=b;c=d"[..]); } + #[test] + fn metric_name_put_tag() { + let with_tags = new_name_graphite(&b"gorets.bobez;aa=v;aa=z;bb=z;dd=h;"[..]); + + // create some replacements + let mut po_reps: HashMap, String> = HashMap::new(); + po_reps.insert(Aggregate::Count, "count".to_string()); + + let mut pr_reps: HashMap, String> = HashMap::new(); + pr_reps.insert(Aggregate::Count, String::new()); + + let mut t_reps: HashMap, String> = HashMap::new(); + t_reps.insert(Aggregate::Count, "count".to_string()); + + let mut buf = BytesMut::new(); + + t_reps.insert(Aggregate::AggregateTag, "aa".to_string()); + + with_tags + .put_with_aggregate(&mut buf, &AggregationDestination::Tag, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .unwrap(); + + assert_buf( + &mut buf, + &b"gorets.bobez;aa=count;aa=v;aa=z;bb=z;dd=h"[..], + "aggregate properly added in front of tags", + ); + + dbg!("WTF??", &with_tags.name); + t_reps.insert(Aggregate::AggregateTag, "cc".to_string()); + with_tags + .put_with_aggregate(&mut buf, &AggregationDestination::Tag, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .unwrap(); + + assert_buf( + &mut buf, + &b"gorets.bobez;aa=v;aa=z;bb=z;cc=count;dd=h"[..], + "aggregate properly added in middle of tags", + ); + + t_reps.insert(Aggregate::AggregateTag, "ff".to_string()); + with_tags + .put_with_aggregate(&mut buf, &AggregationDestination::Tag, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .unwrap(); + + assert_buf( + &mut buf, + &b"gorets.bobez;aa=v;aa=z;bb=z;dd=h;ff=count"[..], + "aggregate properly added in end of tags", + ); + } + #[test] fn metric_aggregate_modes() { // create some replacements - let mut p_reps: HashMap, String> = HashMap::new(); - p_reps.insert(Aggregate::Count, "count".to_string()); - p_reps.insert(Aggregate::AggregateTag, "MUST NOT BE USED".to_string()); - p_reps.insert(Aggregate::Percentile(0.8f64), "percentile80".to_string()); + let mut po_reps: HashMap, String> = HashMap::new(); + po_reps.insert(Aggregate::Count, "count".to_string()); + po_reps.insert(Aggregate::AggregateTag, "MUST NOT BE USED".to_string()); + po_reps.insert(Aggregate::Percentile(0.8f64), "percentile80".to_string()); + po_reps.insert(Aggregate::UpdateCount, "".to_string()); + + let mut pr_reps: HashMap, String> = HashMap::new(); + pr_reps.insert(Aggregate::Count, "counts".to_string()); + pr_reps.insert(Aggregate::UpdateCount, "updates".to_string()); + pr_reps.insert(Aggregate::Percentile(0.8f64), "".to_string()); let mut t_reps: HashMap, String> = HashMap::new(); t_reps.insert(Aggregate::Count, "cnt".to_string()); + t_reps.insert(Aggregate::UpdateCount, "updates".to_string()); t_reps.insert(Aggregate::AggregateTag, "agg".to_string()); // intentionally skip adding percentile80 - let mut without_tags = MetricName::new(BytesMut::from(&b"gorets.bobez"[..]), None); - let mut with_tags = MetricName::new(BytesMut::from(&b"gorets.bobez;tag=value"[..]), None); - let mut with_semicolon = MetricName::new(BytesMut::from(&b"gorets.bobez;"[..]), None); - without_tags.find_tag_pos(true); - with_tags.find_tag_pos(true); - with_semicolon.find_tag_pos(true); + let without_tags = new_name_graphite(&b"gorets.bobez"[..]); + let with_tags = new_name_graphite(&b"gorets.bobez;tag=value"[..]); + let with_semicolon = new_name_graphite(&b"gorets.bobez;"[..]); // create 0-size buffer to make sure allocation counts work as intended let mut buf = BytesMut::new(); @@ -327,95 +522,217 @@ mod tests { // --------- without_tags // max is not in replacements - assert!(without_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Max, &p_reps, &t_reps).is_err(), "non existing replacement gave no error"); - - without_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Value, &p_reps, &t_reps).unwrap(); + assert!( + without_tags + .put_with_aggregate(&mut buf, &AggregationDestination::Smart, &Aggregate::Max, &po_reps, &pr_reps, &t_reps) + .is_err(), + "non existing replacement gave no error" + ); + + // value is aggregated withtout replacements + without_tags + .put_with_aggregate(&mut buf, &AggregationDestination::Smart, &Aggregate::Value, &po_reps, &pr_reps, &t_reps) + .unwrap(); assert_eq!(&buf.take()[..], &b"gorets.bobez"[..]); - without_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Count, &p_reps, &t_reps).unwrap(); - assert_eq!(&buf.take()[..], &b"gorets.bobez.count"[..]); - - without_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Percentile(0.8f64), &p_reps, &t_reps).unwrap(); + // update count is aggregated only with prefix + without_tags + .put_with_aggregate(&mut buf, &AggregationDestination::Smart, &Aggregate::UpdateCount, &po_reps, &pr_reps, &t_reps) + .unwrap(); + + assert_eq!(&buf.take()[..], &b"updates.gorets.bobez"[..]); + + with_tags + .put_with_aggregate(&mut buf, &AggregationDestination::Smart, &Aggregate::UpdateCount, &po_reps, &pr_reps, &t_reps) + .unwrap(); + + assert_buf( + &mut buf, + &b"updates.gorets.bobez;agg=updates;tag=value"[..], + "add aggregate to tagged metric in smart mode", + ); + + // different aggregation modes work as intended + without_tags + .put_with_aggregate(&mut buf, &AggregationDestination::Smart, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .unwrap(); + assert_eq!(&buf.take()[..], &b"counts.gorets.bobez.count"[..]); + + without_tags + .put_with_aggregate( + &mut buf, + &AggregationDestination::Smart, + &Aggregate::Percentile(0.8f64), + &po_reps, + &pr_reps, + &t_reps, + ) + .unwrap(); assert_eq!(&buf.take()[..], &b"gorets.bobez.percentile80"[..], "existing postfix replacement was not put"); - without_tags.put_with_aggregate(&mut buf, AggregationDestination::Name, &Aggregate::Count, &p_reps, &t_reps).unwrap(); - assert_eq!(&buf.take()[..], &b"gorets.bobez.count"[..]); - - without_tags.put_with_aggregate(&mut buf, AggregationDestination::Name, &Aggregate::Percentile(0.8f64), &p_reps, &t_reps).unwrap(); + without_tags + .put_with_aggregate(&mut buf, &AggregationDestination::Name, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .unwrap(); + assert_eq!(&buf.take()[..], &b"counts.gorets.bobez.count"[..]); + + without_tags + .put_with_aggregate( + &mut buf, + &AggregationDestination::Name, + &Aggregate::Percentile(0.8f64), + &po_reps, + &pr_reps, + &t_reps, + ) + .unwrap(); assert_eq!(&buf.take()[..], &b"gorets.bobez.percentile80"[..], "existing postfix replacement was not put"); - without_tags.put_with_aggregate(&mut buf, AggregationDestination::Tag, &Aggregate::Count, &p_reps, &t_reps).unwrap(); - assert_eq!(&buf.take()[..], &b"gorets.bobez;agg=cnt"[..]); - - let err = without_tags.put_with_aggregate(&mut buf, AggregationDestination::Tag, &Aggregate::Percentile(0.8f64), &p_reps, &t_reps); + without_tags + .put_with_aggregate(&mut buf, &AggregationDestination::Tag, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .unwrap(); + assert_eq!(&buf.take()[..], &b"counts.gorets.bobez;agg=cnt"[..]); + + let err = without_tags.put_with_aggregate( + &mut buf, + &AggregationDestination::Tag, + &Aggregate::Percentile(0.8f64), + &po_reps, + &pr_reps, + &t_reps, + ); assert_eq!(err, Err(()), "p80 aggregated into tags whilt it should not"); - without_tags.put_with_aggregate(&mut buf, AggregationDestination::Both, &Aggregate::Count, &p_reps, &t_reps).unwrap(); - assert_eq!(&buf.take()[..], &b"gorets.bobez.count;agg=cnt"[..]); - - let err = without_tags.put_with_aggregate(&mut buf, AggregationDestination::Both, &Aggregate::Percentile(0.8f64), &p_reps, &t_reps); + without_tags + .put_with_aggregate(&mut buf, &AggregationDestination::Both, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .unwrap(); + assert_eq!(&buf.take()[..], &b"counts.gorets.bobez.count;agg=cnt"[..]); + + let err = without_tags.put_with_aggregate( + &mut buf, + &AggregationDestination::Both, + &Aggregate::Percentile(0.8f64), + &po_reps, + &pr_reps, + &t_reps, + ); assert_eq!(err, Err(()), "p80 aggregated into tags whilt it should not"); // --------- with_tags - assert!(with_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Max, &p_reps, &t_reps).is_err()); + assert!(with_tags + .put_with_aggregate(&mut buf, &AggregationDestination::Smart, &Aggregate::Max, &po_reps, &pr_reps, &t_reps) + .is_err()); - with_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Value, &p_reps, &t_reps).unwrap(); + with_tags + .put_with_aggregate(&mut buf, &AggregationDestination::Smart, &Aggregate::Value, &po_reps, &pr_reps, &t_reps) + .unwrap(); assert_eq!(&buf.take()[..], &b"gorets.bobez;tag=value"[..]); - with_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Count, &p_reps, &t_reps).unwrap(); - assert_eq!(&buf.take()[..], &b"gorets.bobez;tag=value;agg=cnt"[..]); - - let err = with_tags.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Percentile(0.8f64), &p_reps, &t_reps); + with_tags + .put_with_aggregate(&mut buf, &AggregationDestination::Smart, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .unwrap(); + assert_eq!(&buf.take()[..], &b"counts.gorets.bobez;agg=cnt;tag=value"[..]); + + let err = with_tags.put_with_aggregate( + &mut buf, + &AggregationDestination::Smart, + &Aggregate::Percentile(0.8f64), + &po_reps, + &pr_reps, + &t_reps, + ); assert_eq!(err, Err(()), "p80 aggregated into tags whilt it should not"); - with_tags.put_with_aggregate(&mut buf, AggregationDestination::Name, &Aggregate::Count, &p_reps, &t_reps).unwrap(); - assert_eq!(&buf.take()[..], &b"gorets.bobez.count;tag=value"[..]); - - with_tags.put_with_aggregate(&mut buf, AggregationDestination::Tag, &Aggregate::Count, &p_reps, &t_reps).unwrap(); - assert_eq!(&buf.take()[..], &b"gorets.bobez;tag=value;agg=cnt"[..]); - - let err = with_tags.put_with_aggregate(&mut buf, AggregationDestination::Tag, &Aggregate::Percentile(0.8f64), &p_reps, &t_reps); + with_tags + .put_with_aggregate(&mut buf, &AggregationDestination::Name, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .unwrap(); + assert_buf(&mut buf, &b"counts.gorets.bobez.count;tag=value"[..], "put tagged metric in name mode"); + + with_tags + .put_with_aggregate(&mut buf, &AggregationDestination::Tag, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .unwrap(); + assert_buf( + &mut buf, + &b"counts.gorets.bobez;agg=cnt;tag=value"[..], + "add aggregate to tagged metric in tag mode", + ); + + let err = with_tags.put_with_aggregate( + &mut buf, + &AggregationDestination::Tag, + &Aggregate::Percentile(0.8f64), + &po_reps, + &pr_reps, + &t_reps, + ); assert_eq!(err, Err(()), "p80 aggregated into tags whilt it should not"); - with_tags.put_with_aggregate(&mut buf, AggregationDestination::Both, &Aggregate::Count, &p_reps, &t_reps).unwrap(); - assert_eq!(&buf.take()[..], &b"gorets.bobez.count;tag=value;agg=cnt"[..]); - - let err = with_tags.put_with_aggregate(&mut buf, AggregationDestination::Both, &Aggregate::Percentile(0.8f64), &p_reps, &t_reps); + with_tags + .put_with_aggregate(&mut buf, &AggregationDestination::Both, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .unwrap(); + assert_buf( + &mut buf, + &b"counts.gorets.bobez.count;agg=cnt;tag=value"[..], + "add aggregate to tagged metric in both mode", + ); + + let err = with_tags.put_with_aggregate( + &mut buf, + &AggregationDestination::Both, + &Aggregate::Percentile(0.8f64), + &po_reps, + &pr_reps, + &t_reps, + ); assert_eq!(err, Err(()), "p80 aggregated into tags whilt it should not"); // ensure trailing semicolon is not duplicated - with_semicolon.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Value, &p_reps, &t_reps).unwrap(); + with_semicolon + .put_with_aggregate(&mut buf, &AggregationDestination::Smart, &Aggregate::Value, &po_reps, &pr_reps, &t_reps) + .unwrap(); assert_eq!(&buf.take()[..], &b"gorets.bobez;"[..]); - with_semicolon.put_with_aggregate(&mut buf, AggregationDestination::Smart, &Aggregate::Count, &p_reps, &t_reps).unwrap(); - assert_eq!(&buf.take()[..], &b"gorets.bobez;agg=cnt"[..]); + with_semicolon + .put_with_aggregate(&mut buf, &AggregationDestination::Smart, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .unwrap(); + assert_eq!(&buf.take()[..], &b"counts.gorets.bobez;agg=cnt"[..]); } #[test] fn metric_name_sort_tags_graphite() { + let mut name = BytesMut::from(&b"gorets2;tag3=shit;t2=fuck"[..]); + let mode = TagFormat::Graphite; + let mut intermediate: Vec = Vec::new(); - let mut name = MetricName::new(BytesMut::from(&b"gorets2;tag3=shit;t2=fuck"[..]), None); - name.find_tag_pos(false); - if intermediate.len() < name.name.len() { - intermediate.resize(name.name.len(), 0u8); - } - assert!(name.sort_tags(TagFormat::Graphite, &mut intermediate).is_ok()); + intermediate.resize(name.len(), 0u8); - let mut name = MetricName::new(BytesMut::from(&b"gorets.bobez;t=y;a=b;c=e;u=v;c=d;c=b;aaa=z"[..]), None); - name.find_tag_pos(false); - let tag_pos = name.tag_pos.unwrap(); + let tag_pos = find_tag_pos(&name, mode).unwrap(); - let tag_len = name.name.len() - tag_pos; - assert_eq!(tag_len, 30); + assert!(sort_tags(&mut name[..], mode, &mut intermediate, tag_pos).is_ok()); - //let mut intermediate = Vec::with_capacity(tag_len); + let mut name = BytesMut::from(&b"gorets.bobez;t=y;a=b;;;c=e;u=v;c=d;c=b;aaa=z"[..]); + let tag_pos = find_tag_pos(&name, mode).unwrap(); + let tag_len = name.len() - tag_pos; + assert_eq!(tag_len, 32); + assert_eq!(&name[..tag_pos], &b"gorets.bobez"[..]); + assert_eq!(&name[tag_pos..], &b";t=y;a=b;;;c=e;u=v;c=d;c=b;aaa=z"[..]); let mut intermediate = BytesMut::new(); intermediate.resize(tag_len - 1, 0u8); // intentionally resize to less bytes than required - assert!(name.sort_tags(TagFormat::Graphite, &mut intermediate).is_err()); - - intermediate.put(0u8); // resize to good length now - assert!(name.sort_tags(TagFormat::Graphite, &mut intermediate).is_ok()); - assert_eq!(&name.name[..], &b"gorets.bobez;a=b;aaa=z;c=b;c=d;c=e;t=y;u=v"[..]); + assert!(sort_tags(&mut name[..], mode, &mut intermediate, tag_pos).is_err()); + + intermediate.extend_from_slice(&[0u8]); // resize to good length now + let newlen = sort_tags(&mut name[..], mode, &mut intermediate, tag_pos).unwrap(); + assert_eq!(newlen, name.len() - 2); // two semicolons should be removed + assert_eq!( + &name[..newlen], + &b"gorets.bobez;a=b;aaa=z;c=b;c=d;c=e;t=y;u=v"[..], + "{} {}", + String::from_utf8_lossy(&name[..]), + String::from_utf8_lossy(&name[..newlen]) + ); + + let mut name = BytesMut::from(&b"gorets.bobez;bb=z;aa=z;aa=v;dd=h;"[..]); + let newlen = sort_tags(&mut name[..], mode, &mut intermediate, tag_pos).unwrap(); + assert_eq!(&name[..newlen], &b"gorets.bobez;aa=v;aa=z;bb=z;dd=h"[..]); } } diff --git a/src/parser.rs b/src/parser.rs index 6dfa8e2..7cb36cf 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -16,7 +16,7 @@ use combine::{optional, skip_many1, Parser}; use bytes::BytesMut; use crate::metric::{FromF64, Metric, MetricType}; -use crate::name::MetricName; +use crate::name::{sort_tags, MetricName, TagFormat}; use num_traits::{AsPrimitive, Float}; /// Used for returning parsing result @@ -35,7 +35,10 @@ where /// (TODO: code example) /// /// It's current goal is to be fast, use less allocs and to not depend on error and even probably input typing -pub fn metric_stream_parser<'a, I, F>(max_unparsed: usize, max_tags_len: usize) -> impl Parser, PartialState = impl Default + 'a> +pub fn metric_stream_parser<'a, I, F>( + max_unparsed: usize, + max_tags_len: usize, +) -> impl Parser, PartialState = impl Default + 'a> where I: RangeStream + std::fmt::Debug, I::Error: ParseError, @@ -100,7 +103,9 @@ where let sampling = (bytes(b"|@"), recognize(unsigned_float)).and_then(|(_, val)| { // TODO replace from_utf8 with handmade parser removing recognize - from_utf8(val).map_err(StreamErrorFor::::other).map(|v| v.parse::().map_err(StreamErrorFor::::other))? + from_utf8(val) + .map_err(StreamErrorFor::::other) + .map(|v| v.parse::().map_err(StreamErrorFor::::other))? }); let metric = ( @@ -157,6 +162,7 @@ pub struct MetricParser<'a, F, E: ParseErrorHandler> { max_unparsed: usize, max_tags_len: usize, handler: E, + sort_buf: Vec, _pd: PhantomData, } @@ -165,7 +171,17 @@ where E: ParseErrorHandler, { pub fn new(input: &'a mut BytesMut, max_unparsed: usize, max_tags_len: usize, handler: E) -> Self { - Self { input, skip: 0, max_unparsed, max_tags_len, handler, _pd: PhantomData } + let mut sort_buf = Vec::with_capacity(max_unparsed); + sort_buf.resize(max_unparsed, 0u8); + Self { + input, + skip: 0, + max_unparsed, + max_tags_len, + handler, + sort_buf, + _pd: PhantomData, + } } } @@ -235,13 +251,21 @@ where self.input.advance(start); // now we can cut the name itself - let name = self.input.split_to(stop - start); + let mut name = self.input.split_to(stop - start); self.input.advance(metriclen); self.skip = 0; - return Some((MetricName::new(name, tag_pos), metric)); + if let Some(pos) = tag_pos { + // with tag_pos found we need to try to sort tags + // + // since the buffer is created by ourselves, we are responsible for it's size, so + // it's WAY better to panic here if buffer size is incorrect + sort_tags(&mut name[..], TagFormat::Graphite, &mut self.sort_buf, pos).unwrap(); + } + + return Some((MetricName::from_raw_parts(name.freeze(), tag_pos), metric)); } Ok((Some(ParsedPart::Trash(pos)), consumed)) => { // trash matched @@ -473,10 +497,11 @@ mod tests { assert_eq!(name.tag_pos, None); assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(1)), None, None).unwrap()); + // parser must sort tags let (name, metric) = parser.next().unwrap(); - assert_eq!(&name.name[..], &b"gorets2;tag3=shit;t2=fuck"[..]); + assert_eq!(&name.name[..], &b"gorets2;t2=fuck;tag3=shit"[..]); assert_eq!(name.tag_pos, Some(7usize)); - assert_eq!(&name.name[name.tag_pos.unwrap()..], &b";tag3=shit;t2=fuck"[..]); + assert_eq!(&name.name[name.tag_pos.unwrap()..], &b";t2=fuck;tag3=shit"[..]); assert_eq!(metric, Metric::::new(1000f64, MetricType::Gauge(Some(-1)), None, Some(0.5)).unwrap()); } @@ -485,7 +510,28 @@ mod tests { let mut data = BytesMut::new(); data.extend_from_slice(b"gorets1:+1001|g\nT\x01RAi:|\x01SH\nnuggets2:-1002|s|@0.5\nMORETrasH\nFUUU\n\ngorets3:+1003|ggorets4:+1004|ms:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::ggorets5:1005|ms"); - let correct = vec![(Bytes::from("gorets1"), Metric::::new(1001f64, MetricType::Gauge(Some(1)), None, None).unwrap()), (Bytes::from("nuggets2"), Metric::::new(-1002f64, MetricType::Set(HashSet::new()), None, Some(0.5)).unwrap()), (Bytes::from("gorets3"), Metric::::new(1003f64, MetricType::Gauge(Some(1)), None, None).unwrap()), (Bytes::from("gorets4"), Metric::::new(1004f64, MetricType::Timer(Vec::new()), None, None).unwrap()), (Bytes::from("gorets5"), Metric::::new(1005f64, MetricType::Timer(Vec::new()), None, None).unwrap())]; + let correct = vec![ + ( + Bytes::from("gorets1"), + Metric::::new(1001f64, MetricType::Gauge(Some(1)), None, None).unwrap(), + ), + ( + Bytes::from("nuggets2"), + Metric::::new(-1002f64, MetricType::Set(HashSet::new()), None, Some(0.5)).unwrap(), + ), + ( + Bytes::from("gorets3"), + Metric::::new(1003f64, MetricType::Gauge(Some(1)), None, None).unwrap(), + ), + ( + Bytes::from("gorets4"), + Metric::::new(1004f64, MetricType::Timer(Vec::new()), None, None).unwrap(), + ), + ( + Bytes::from("gorets5"), + Metric::::new(1005f64, MetricType::Timer(Vec::new()), None, None).unwrap(), + ), + ]; for i in 1..(data.len() + 1) { // this is out test case - partially received data let mut testinput = BytesMut::from(&data[0..i]); From cf27df189e457e884506a9abe9152537e8e6bb5c Mon Sep 17 00:00:00 2001 From: Sergey Noskov Date: Fri, 13 Dec 2019 08:42:13 +0300 Subject: [PATCH 19/23] Make clippy happy --- rustfmt.toml | 2 ++ src/name.rs | 11 ++++++----- src/parser.rs | 3 +-- 3 files changed, 9 insertions(+), 7 deletions(-) create mode 100644 rustfmt.toml diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..65241ae --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,2 @@ +max_width=160 +ideal_width=80 diff --git a/src/name.rs b/src/name.rs index cdcea2a..abca123 100644 --- a/src/name.rs +++ b/src/name.rs @@ -57,7 +57,7 @@ pub(crate) fn sort_tags(name: &mut [u8], mode: TagFormat, intermediate: &mut [u8 let mut offset = 0; // meaningful length of data in intermediate buffer let mut cutlen = 0; // length to cut from name because of removed empty tags for part in name.split(|c| *c == b';').skip(1).sorted() { - if part.len() == 0 { + if part.is_empty() { // offset += 1; cutlen += 1; } else { @@ -82,9 +82,9 @@ pub(crate) fn sort_tags(name: &mut [u8], mode: TagFormat, intermediate: &mut [u8 name[tag_pos + 1..newlen].copy_from_slice(&intermediate[..offset]); } - return Ok(newlen); + Ok(newlen) } - }; + } } /// Contains buffer containing the full metric name including tags @@ -310,11 +310,12 @@ impl MetricName { /// to avoid putting different aggregates into same names /// requires all replacements to exist, giving error otherwise /// does no checks on overriding though + #[allow(clippy::unit_arg)] pub fn put_with_aggregate( // rustfmt &self, buf: &mut BytesMut, - dest: &AggregationDestination, + dest: AggregationDestination, agg: &Aggregate, postfix_replacements: &HashMap, String>, prefix_replacements: &HashMap, String>, @@ -333,7 +334,7 @@ impl MetricName { // find and put prefix first let prefix = prefix_replacements.get(agg).ok_or(())?; - if prefix.len() > 0 { + if !prefix.is_empty() { buf.reserve(prefix.len() + 1); buf.put(prefix); buf.put(b'.'); diff --git a/src/parser.rs b/src/parser.rs index 7cb36cf..58bb8a6 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -171,8 +171,7 @@ where E: ParseErrorHandler, { pub fn new(input: &'a mut BytesMut, max_unparsed: usize, max_tags_len: usize, handler: E) -> Self { - let mut sort_buf = Vec::with_capacity(max_unparsed); - sort_buf.resize(max_unparsed, 0u8); + let sort_buf = vec![0; max_unparsed]; Self { input, skip: 0, From 4f4b60700e54777e8a0382c9c3f2eb7d80c7af12 Mon Sep 17 00:00:00 2001 From: Sergey Noskov Date: Mon, 16 Dec 2019 07:23:01 +0300 Subject: [PATCH 20/23] Make small values Copy --- src/aggregate.rs | 4 +-- src/name.rs | 88 +++++++++++++++++++++++------------------------- 2 files changed, 44 insertions(+), 48 deletions(-) diff --git a/src/aggregate.rs b/src/aggregate.rs index 8004112..e0a339e 100644 --- a/src/aggregate.rs +++ b/src/aggregate.rs @@ -49,11 +49,11 @@ where } } -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] #[serde(rename_all = "kebab-case", deny_unknown_fields, try_from = "String")] pub enum Aggregate where - F: Float + Debug + FromF64 + AsPrimitive, + F: Copy + Float + Debug + FromF64 + AsPrimitive, { #[serde(skip)] Value, diff --git a/src/name.rs b/src/name.rs index abca123..8408909 100644 --- a/src/name.rs +++ b/src/name.rs @@ -283,10 +283,6 @@ impl MetricName { // put other tags with leading semicolon buf.extend_from_slice(&self.name[pos..]); } else { - dbg!("MID", String::from_utf8_lossy(&buf[..])); - dbg!("MID1??", &self.name); - dbg!("MID1", String::from_utf8_lossy(&self.name[..])); - dbg!("MID2", String::from_utf8_lossy(&self.name[pos..offset])); // put tags before offset including first semicolon buf.extend_from_slice(&self.name[pos..offset]); @@ -316,7 +312,7 @@ impl MetricName { &self, buf: &mut BytesMut, dest: AggregationDestination, - agg: &Aggregate, + agg: Aggregate, postfix_replacements: &HashMap, String>, prefix_replacements: &HashMap, String>, tag_replacements: &HashMap, String>, @@ -325,7 +321,7 @@ impl MetricName { F: Float + Debug + FromF64 + AsPrimitive, { // for value aggregate ignore replacements and other shit - if agg == &Aggregate::Value { + if agg == Aggregate::Value { let namelen = self.name.len(); buf.reserve(namelen); buf.put_slice(&self.name); @@ -333,7 +329,7 @@ impl MetricName { } // find and put prefix first - let prefix = prefix_replacements.get(agg).ok_or(())?; + let prefix = prefix_replacements.get(&agg).ok_or(())?; if !prefix.is_empty() { buf.reserve(prefix.len() + 1); buf.put(prefix); @@ -346,31 +342,31 @@ impl MetricName { match dest { AggregationDestination::Smart if self.tag_pos.is_none() => { - let agg_postfix = postfix_replacements.get(agg).ok_or(())?.as_bytes(); + let agg_postfix = postfix_replacements.get(&agg).ok_or(())?.as_bytes(); // metric is untagged, add aggregate to name Ok(self.put_with_suffix(buf, agg_postfix, true)) } AggregationDestination::Smart => { let agg_tag_name = tag_replacements.get(&Aggregate::AggregateTag).ok_or(())?.as_bytes(); - let agg_tag_value = tag_replacements.get(agg).ok_or(())?.as_bytes(); + let agg_tag_value = tag_replacements.get(&agg).ok_or(())?.as_bytes(); // metric is tagged, add aggregate as tag Ok(self.put_with_fixed_tag(buf, agg_tag_name, agg_tag_value, false)) } AggregationDestination::Name => { - let agg_postfix = postfix_replacements.get(agg).ok_or(())?.as_bytes(); + let agg_postfix = postfix_replacements.get(&agg).ok_or(())?.as_bytes(); Ok(self.put_with_suffix(buf, agg_postfix, true)) } AggregationDestination::Tag => { let agg_tag_name = tag_replacements.get(&Aggregate::AggregateTag).ok_or(())?.as_bytes(); - let agg_tag_value = tag_replacements.get(agg).ok_or(())?.as_bytes(); + let agg_tag_value = tag_replacements.get(&agg).ok_or(())?.as_bytes(); Ok(self.put_with_fixed_tag(buf, agg_tag_name, agg_tag_value, false)) } AggregationDestination::Both => { - let agg_postfix = postfix_replacements.get(agg).ok_or(())?.as_bytes(); + let agg_postfix = postfix_replacements.get(&agg).ok_or(())?.as_bytes(); let agg_tag_name = tag_replacements.get(&Aggregate::AggregateTag).ok_or(())?.as_bytes(); - let agg_tag_value = tag_replacements.get(agg).ok_or(())?.as_bytes(); + let agg_tag_value = tag_replacements.get(&agg).ok_or(())?.as_bytes(); self.put_with_suffix(buf, agg_postfix, false); self.put_with_fixed_tag(buf, agg_tag_name, agg_tag_value, true); @@ -460,7 +456,7 @@ mod tests { t_reps.insert(Aggregate::AggregateTag, "aa".to_string()); with_tags - .put_with_aggregate(&mut buf, &AggregationDestination::Tag, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .put_with_aggregate(&mut buf, AggregationDestination::Tag, Aggregate::Count, &po_reps, &pr_reps, &t_reps) .unwrap(); assert_buf( @@ -472,7 +468,7 @@ mod tests { dbg!("WTF??", &with_tags.name); t_reps.insert(Aggregate::AggregateTag, "cc".to_string()); with_tags - .put_with_aggregate(&mut buf, &AggregationDestination::Tag, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .put_with_aggregate(&mut buf, AggregationDestination::Tag, Aggregate::Count, &po_reps, &pr_reps, &t_reps) .unwrap(); assert_buf( @@ -483,7 +479,7 @@ mod tests { t_reps.insert(Aggregate::AggregateTag, "ff".to_string()); with_tags - .put_with_aggregate(&mut buf, &AggregationDestination::Tag, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .put_with_aggregate(&mut buf, AggregationDestination::Tag, Aggregate::Count, &po_reps, &pr_reps, &t_reps) .unwrap(); assert_buf( @@ -525,26 +521,26 @@ mod tests { // max is not in replacements assert!( without_tags - .put_with_aggregate(&mut buf, &AggregationDestination::Smart, &Aggregate::Max, &po_reps, &pr_reps, &t_reps) + .put_with_aggregate(&mut buf, AggregationDestination::Smart, Aggregate::Max, &po_reps, &pr_reps, &t_reps) .is_err(), "non existing replacement gave no error" ); // value is aggregated withtout replacements without_tags - .put_with_aggregate(&mut buf, &AggregationDestination::Smart, &Aggregate::Value, &po_reps, &pr_reps, &t_reps) + .put_with_aggregate(&mut buf, AggregationDestination::Smart, Aggregate::Value, &po_reps, &pr_reps, &t_reps) .unwrap(); assert_eq!(&buf.take()[..], &b"gorets.bobez"[..]); // update count is aggregated only with prefix without_tags - .put_with_aggregate(&mut buf, &AggregationDestination::Smart, &Aggregate::UpdateCount, &po_reps, &pr_reps, &t_reps) + .put_with_aggregate(&mut buf, AggregationDestination::Smart, Aggregate::UpdateCount, &po_reps, &pr_reps, &t_reps) .unwrap(); assert_eq!(&buf.take()[..], &b"updates.gorets.bobez"[..]); with_tags - .put_with_aggregate(&mut buf, &AggregationDestination::Smart, &Aggregate::UpdateCount, &po_reps, &pr_reps, &t_reps) + .put_with_aggregate(&mut buf, AggregationDestination::Smart, Aggregate::UpdateCount, &po_reps, &pr_reps, &t_reps) .unwrap(); assert_buf( @@ -555,15 +551,15 @@ mod tests { // different aggregation modes work as intended without_tags - .put_with_aggregate(&mut buf, &AggregationDestination::Smart, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .put_with_aggregate(&mut buf, AggregationDestination::Smart, Aggregate::Count, &po_reps, &pr_reps, &t_reps) .unwrap(); assert_eq!(&buf.take()[..], &b"counts.gorets.bobez.count"[..]); without_tags .put_with_aggregate( &mut buf, - &AggregationDestination::Smart, - &Aggregate::Percentile(0.8f64), + AggregationDestination::Smart, + Aggregate::Percentile(0.8f64), &po_reps, &pr_reps, &t_reps, @@ -572,15 +568,15 @@ mod tests { assert_eq!(&buf.take()[..], &b"gorets.bobez.percentile80"[..], "existing postfix replacement was not put"); without_tags - .put_with_aggregate(&mut buf, &AggregationDestination::Name, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .put_with_aggregate(&mut buf, AggregationDestination::Name, Aggregate::Count, &po_reps, &pr_reps, &t_reps) .unwrap(); assert_eq!(&buf.take()[..], &b"counts.gorets.bobez.count"[..]); without_tags .put_with_aggregate( &mut buf, - &AggregationDestination::Name, - &Aggregate::Percentile(0.8f64), + AggregationDestination::Name, + Aggregate::Percentile(0.8f64), &po_reps, &pr_reps, &t_reps, @@ -589,14 +585,14 @@ mod tests { assert_eq!(&buf.take()[..], &b"gorets.bobez.percentile80"[..], "existing postfix replacement was not put"); without_tags - .put_with_aggregate(&mut buf, &AggregationDestination::Tag, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .put_with_aggregate(&mut buf, AggregationDestination::Tag, Aggregate::Count, &po_reps, &pr_reps, &t_reps) .unwrap(); assert_eq!(&buf.take()[..], &b"counts.gorets.bobez;agg=cnt"[..]); let err = without_tags.put_with_aggregate( &mut buf, - &AggregationDestination::Tag, - &Aggregate::Percentile(0.8f64), + AggregationDestination::Tag, + Aggregate::Percentile(0.8f64), &po_reps, &pr_reps, &t_reps, @@ -604,14 +600,14 @@ mod tests { assert_eq!(err, Err(()), "p80 aggregated into tags whilt it should not"); without_tags - .put_with_aggregate(&mut buf, &AggregationDestination::Both, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .put_with_aggregate(&mut buf, AggregationDestination::Both, Aggregate::Count, &po_reps, &pr_reps, &t_reps) .unwrap(); assert_eq!(&buf.take()[..], &b"counts.gorets.bobez.count;agg=cnt"[..]); let err = without_tags.put_with_aggregate( &mut buf, - &AggregationDestination::Both, - &Aggregate::Percentile(0.8f64), + AggregationDestination::Both, + Aggregate::Percentile(0.8f64), &po_reps, &pr_reps, &t_reps, @@ -620,23 +616,23 @@ mod tests { // --------- with_tags assert!(with_tags - .put_with_aggregate(&mut buf, &AggregationDestination::Smart, &Aggregate::Max, &po_reps, &pr_reps, &t_reps) + .put_with_aggregate(&mut buf, AggregationDestination::Smart, Aggregate::Max, &po_reps, &pr_reps, &t_reps) .is_err()); with_tags - .put_with_aggregate(&mut buf, &AggregationDestination::Smart, &Aggregate::Value, &po_reps, &pr_reps, &t_reps) + .put_with_aggregate(&mut buf, AggregationDestination::Smart, Aggregate::Value, &po_reps, &pr_reps, &t_reps) .unwrap(); assert_eq!(&buf.take()[..], &b"gorets.bobez;tag=value"[..]); with_tags - .put_with_aggregate(&mut buf, &AggregationDestination::Smart, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .put_with_aggregate(&mut buf, AggregationDestination::Smart, Aggregate::Count, &po_reps, &pr_reps, &t_reps) .unwrap(); assert_eq!(&buf.take()[..], &b"counts.gorets.bobez;agg=cnt;tag=value"[..]); let err = with_tags.put_with_aggregate( &mut buf, - &AggregationDestination::Smart, - &Aggregate::Percentile(0.8f64), + AggregationDestination::Smart, + Aggregate::Percentile(0.8f64), &po_reps, &pr_reps, &t_reps, @@ -644,12 +640,12 @@ mod tests { assert_eq!(err, Err(()), "p80 aggregated into tags whilt it should not"); with_tags - .put_with_aggregate(&mut buf, &AggregationDestination::Name, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .put_with_aggregate(&mut buf, AggregationDestination::Name, Aggregate::Count, &po_reps, &pr_reps, &t_reps) .unwrap(); assert_buf(&mut buf, &b"counts.gorets.bobez.count;tag=value"[..], "put tagged metric in name mode"); with_tags - .put_with_aggregate(&mut buf, &AggregationDestination::Tag, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .put_with_aggregate(&mut buf, AggregationDestination::Tag, Aggregate::Count, &po_reps, &pr_reps, &t_reps) .unwrap(); assert_buf( &mut buf, @@ -659,8 +655,8 @@ mod tests { let err = with_tags.put_with_aggregate( &mut buf, - &AggregationDestination::Tag, - &Aggregate::Percentile(0.8f64), + AggregationDestination::Tag, + Aggregate::Percentile(0.8f64), &po_reps, &pr_reps, &t_reps, @@ -668,7 +664,7 @@ mod tests { assert_eq!(err, Err(()), "p80 aggregated into tags whilt it should not"); with_tags - .put_with_aggregate(&mut buf, &AggregationDestination::Both, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .put_with_aggregate(&mut buf, AggregationDestination::Both, Aggregate::Count, &po_reps, &pr_reps, &t_reps) .unwrap(); assert_buf( &mut buf, @@ -678,8 +674,8 @@ mod tests { let err = with_tags.put_with_aggregate( &mut buf, - &AggregationDestination::Both, - &Aggregate::Percentile(0.8f64), + AggregationDestination::Both, + Aggregate::Percentile(0.8f64), &po_reps, &pr_reps, &t_reps, @@ -688,12 +684,12 @@ mod tests { // ensure trailing semicolon is not duplicated with_semicolon - .put_with_aggregate(&mut buf, &AggregationDestination::Smart, &Aggregate::Value, &po_reps, &pr_reps, &t_reps) + .put_with_aggregate(&mut buf, AggregationDestination::Smart, Aggregate::Value, &po_reps, &pr_reps, &t_reps) .unwrap(); assert_eq!(&buf.take()[..], &b"gorets.bobez;"[..]); with_semicolon - .put_with_aggregate(&mut buf, &AggregationDestination::Smart, &Aggregate::Count, &po_reps, &pr_reps, &t_reps) + .put_with_aggregate(&mut buf, AggregationDestination::Smart, Aggregate::Count, &po_reps, &pr_reps, &t_reps) .unwrap(); assert_eq!(&buf.take()[..], &b"counts.gorets.bobez;agg=cnt"[..]); } From 71b74cd596e79b826198773cfc721fa3d98831d4 Mon Sep 17 00:00:00 2001 From: Sergey Noskov Date: Mon, 16 Dec 2019 07:54:37 +0300 Subject: [PATCH 21/23] Match aggregates case insensitive --- src/aggregate.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aggregate.rs b/src/aggregate.rs index e0a339e..674758e 100644 --- a/src/aggregate.rs +++ b/src/aggregate.rs @@ -77,7 +77,7 @@ where type Error = String; fn try_from(s: String) -> Result { - match s.as_str() { + match s.to_lowercase().as_str() { "count" => Ok(Aggregate::Count), "last" => Ok(Aggregate::Last), "min" => Ok(Aggregate::Min), From 41f82c5eb4cc627d6b402f88b1262170c89d5ba7 Mon Sep 17 00:00:00 2001 From: Sergey Noskov Date: Mon, 16 Dec 2019 09:09:02 +0300 Subject: [PATCH 22/23] Edit changelog --- CHANGELOG.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c79e11..6701b71 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # 0.2.0 +* aggregation API has changed, supporting custom percentiles, specifying a list of aggregates, etc * metric name is now a separate structure allowing to process tag information -* tags are considered being graphite format and can be sorted so tagged metrics are matched correctly together -* metric aggregation iterator now returns suffixes without leading dot for better usage as tags +* metrics can now have tags in (yet only one) graphite format +* tags are placed in sorted order during aggregation and parsing so they are always correct to be used without creating separate structure for them +* aggregation modes are not supported to put aggregate names in tags or name postfixes +* name prefixing is available per aggregate From fa370650f75162ad0146452933514e3b1d4bf4eb Mon Sep 17 00:00:00 2001 From: Sergey Noskov Date: Wed, 15 Jan 2020 07:56:16 +0300 Subject: [PATCH 23/23] Fix documentation --- Cargo.toml | 2 +- src/aggregate.rs | 8 ++++++-- src/lib.rs | 14 ++++++++++++++ src/metric.rs | 1 + src/name.rs | 13 ++++++------- src/parser.rs | 16 ++++++++-------- 6 files changed, 36 insertions(+), 18 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 477651e..b76029b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,7 @@ description = "Library for handling metrics in bioyino and related projects" edition = "2018" license = "MIT" repository = "https://github.com/Albibek/bioyino-metric" -keywords = ["statsd", "bioyino", "metric", "metrics"] +keywords = ["statsd", "bioyino", "metric", "metrics", "graphite", "tags"] categories = ["parser-implementations", "network-programming"] [dependencies] diff --git a/src/aggregate.rs b/src/aggregate.rs index 674758e..eba64df 100644 --- a/src/aggregate.rs +++ b/src/aggregate.rs @@ -8,8 +8,8 @@ use serde_derive::{Deserialize, Serialize}; use crate::metric::{FromF64, Metric, MetricType}; -// Percentile counter. Not safe. Requires at least two elements in vector -// vector must be sorted +/// Percentile counter. Not safe. Requires at least two elements in vector +/// vector MUST be sorted pub fn percentile(vec: &[F], nth: F) -> F where F: Float + AsPrimitive, @@ -51,11 +51,13 @@ where #[derive(Debug, Clone, Copy, Serialize, Deserialize)] #[serde(rename_all = "kebab-case", deny_unknown_fields, try_from = "String")] +/// Contains list of all possible aggregates and some additional data. pub enum Aggregate where F: Copy + Float + Debug + FromF64 + AsPrimitive, { #[serde(skip)] + /// a dummy aggregate, containing metric's original value Value, Count, Last, @@ -66,6 +68,8 @@ where Mean, UpdateCount, #[serde(skip)] + /// this is not really a metric aggregate, it is used in replacements definition to specify + /// tag name AggregateTag, Percentile(F), } diff --git a/src/lib.rs b/src/lib.rs index eb91ea2..c2bab6a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,11 +1,25 @@ +//! This crate contains useful types and methods for working with metrics in bioyino statsd server and some other +//! metric processing software. Features: +//! +//! * a type for representing typed and timestamped metrics, generic over floating point format +//! * streaming parser of statsd format +//! * metric aggregation routines +//! * working with Graphite-compatible metric naming including basic tags support +//! * schema and functions for sending/receiving metrics in binary Cap'n'Proto format + +/// Aggregation routines pub mod aggregate; +/// Metric values routines pub mod metric; +/// Metric name routines pub mod name; +/// Metric parsing routines pub mod parser; pub use crate::metric::*; pub use crate::name::MetricName; +/// An autogenerated module for working with capnp schema pub mod protocol_capnp { include!(concat!(env!("OUT_DIR"), "/schema/protocol_capnp.rs")); } diff --git a/src/metric.rs b/src/metric.rs index 0a3841b..6580679 100644 --- a/src/metric.rs +++ b/src/metric.rs @@ -45,6 +45,7 @@ where } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +/// A typed, optionally timestamped metric value (i.e. without name) pub struct Metric where F: Copy + PartialEq + Debug, diff --git a/src/name.rs b/src/name.rs index 8408909..dae8cb8 100644 --- a/src/name.rs +++ b/src/name.rs @@ -87,8 +87,8 @@ pub(crate) fn sort_tags(name: &mut [u8], mode: TagFormat, intermediate: &mut [u8 } } -/// Contains buffer containing the full metric name including tags -/// and some data to work with tags +/// Represents a metric name as a buffer containing the full metric name including tags. +/// Also provides methods to work with tags. #[derive(Debug, Eq, Clone)] pub struct MetricName { pub name: Bytes, @@ -129,7 +129,7 @@ impl MetricName { } /// Assemble name from internal parts *without checks*. Tags must be sorted, tag position must - /// be found according to mode + /// be found according to required format pub fn from_raw_parts(name: Bytes, tag_pos: Option) -> Self { Self { name, tag_pos } } @@ -302,10 +302,9 @@ impl MetricName { }; } - /// puts a name with an aggregate to provided buffer depending on dest - /// to avoid putting different aggregates into same names - /// requires all replacements to exist, giving error otherwise - /// does no checks on overriding though + /// Puts a name with an aggregate to provided buffer depending on dest. + /// To avoid putting different aggregates into same names requires all replacements to exist, giving error otherwise. + /// Does no do the check if such overriding is done. #[allow(clippy::unit_arg)] pub fn put_with_aggregate( // rustfmt diff --git a/src/parser.rs b/src/parser.rs index 58bb8a6..3978460 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -30,11 +30,9 @@ where TotalTrash(PointerOffset), } -/// The parser signature may seem cryptic, but it can mostly be ignored totally and get used as -/// simple as -/// (TODO: code example) -/// -/// It's current goal is to be fast, use less allocs and to not depend on error and even probably input typing +// The current goal is to be fast, use less allocs and to not depend on error type. that's why +// the signature may seem to be cryptic. +/// Parse stream of multiple metrics in statsd format. Usage of MetricParser is recommended instead. pub fn metric_stream_parser<'a, I, F>( max_unparsed: usize, max_tags_len: usize, @@ -146,16 +144,18 @@ where pub type MetricParsingError<'a> = easy::Errors; #[allow(unused_variables)] +/// Used to handle parsing errors pub trait ParseErrorHandler { fn handle(&self, buf: &[u8], pos: usize, e: MetricParsingError) {} } +/// Does nothing about error, can be used for ignoring all errors pub struct DummyParseErrorHandler; impl ParseErrorHandler for DummyParseErrorHandler {} -// A high level parser to parse metric and split names from BytesMut -// Follows an iterator pattern, which fires metrics untion it is possible -// modifying the buffer on the fly +/// A high level parser to parse metric and split names from BytesMut. +/// Follows an iterator pattern, which fires metrics until it is possible, +/// modifying the buffer on the fly pub struct MetricParser<'a, F, E: ParseErrorHandler> { input: &'a mut BytesMut, skip: usize,