Skip to content

Commit

Permalink
Added support for categorical data.
Browse files Browse the repository at this point in the history
  • Loading branch information
jfjlaros committed Aug 20, 2022
1 parent c2cd3ca commit 8308735
Show file tree
Hide file tree
Showing 10 changed files with 198 additions and 79 deletions.
10 changes: 5 additions & 5 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@ This library provides a simple way to parse line based data.
**Features:**

- Easy to use.
- Works with all types of delimiters and line endings.
- Works with arbitrary boolean value notation.
- Works with integers in any base.
- Support for all types of delimiters and line endings.
- Support for arbitrary boolean value notation.
- Support for categorical data.
- Support for integers in any base.
- Tiny overhead compared to dedicated solutions.
- Type safe.

Expand Down Expand Up @@ -64,8 +65,7 @@ If the fields have different types, we can use multiple variables.
char a[4];
int b;
double c;
Number<int, 16> d; // Hexadecimal number.
parser.parseLine("one, 2, 3.4, 0x38", a, b, c, d);
parser.parseLine("one, 2, 3.4, 0x38", a, b, c);
.. _ReadTheDocs: https://arduinotextparser.readthedocs.io
17 changes: 15 additions & 2 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,27 @@ Type definitions
.. doxygentypedef:: ccpc


Class definitions
-----------------
Functions
---------

.. doxygenfunction:: strmatch


Types
-----

.. doxygenstruct:: Number
:members:

.. doxygenstruct:: Bool
:members:

.. doxygenstruct:: Category
:members:


Class definitions
-----------------

.. doxygenclass:: TextParser
:members:
2 changes: 0 additions & 2 deletions docs/introduction.rst
Original file line number Diff line number Diff line change
Expand Up @@ -98,5 +98,3 @@ If this library does not meet your requirements, then perhaps the

.. _`delimiter separated values`: https://en.wikipedia.org/wiki/Delimiter-separated_values
.. _`CSV parser for Arduino`: https://github.com/michalmonday/CSV-Parser-for-Arduino


24 changes: 21 additions & 3 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,26 @@ This string can then be used to create a variable of type ``Bool``.
parser.parseLine("YES", a); // `a.value` contains `false`.
Categorical data
----------------

For categorical data, we need to define a global zero terminated list of
labels.

.. code-block:: cpp
char const* labels[] = {"red", "green", "blue", nullptr};
These labels can then be used to create a variable of type ``Category``.

.. code-block:: cpp
Category<int, labels> a;
parser.parseLine("red", a); // `a.value` contains 0.
parser.parseLine("blue", a); // `a.value` contains 2.
parser.parseLine("yellow", a); // `a.value` contains -1.
Integers in other bases
-----------------------

Expand All @@ -95,9 +115,7 @@ Integers in arbitrary bases are supported via the `Number` type.
Examples
--------

Please see the demo_ sketch for an example of basic usage and the multilevel_
sketch for a more complicated example.
Please see the demo_ sketch for an example of basic usage.


.. _demo: https://github.com/jfjlaros/textparser/blob/master/examples/demo/demo.ino
.. _multilevel: https://github.com/jfjlaros/textparser/blob/master/examples/multilevel/multilevel.ino
2 changes: 1 addition & 1 deletion examples/benchmark/benchmark.ino
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ void lib() {

void setup() {
//noLib(); // 2538 bytes program storage space, 204 bytes dynamic memory.
//lib(); // 2604 bytes program storage space, 206 bytes dynamic memory.
//lib(); // 2620 bytes program storage space, 206 bytes dynamic memory.
}

void loop() {}
61 changes: 54 additions & 7 deletions examples/demo/demo.ino
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
#include <textparser.h>

char const truth[] = "Yes";
char const* labels[] = {"red", "green", "blue", nullptr};


void demo1() {
// If all fields are of the same type, we can use an array.
TextParser parser(", ");
int a[5];
parser.parseLine("1, 2, 3, 4, 5", a);

parser.parseLine("1, 2, 3, 4, 5", a);
for (int const& i: a) {
Serial.print(i);
Serial.print(' ');
Expand All @@ -20,24 +23,21 @@ void demo2() {
char a[10];
int b;
double c;
Number<int, 16> d; // Hexadecimal number.
parser.parseLine("one, 2, 3.4", a, b, c, d);

parser.parseLine("one, 2, 3.4", a, b, c);
Serial.print(a);
Serial.print(' ');
Serial.print(b);
Serial.print(' ');
Serial.println(c);
Serial.print(' ');
Serial.println(d.value);
}

void demo3() {
// Line endings can be removed by providing a line delimiter.
TextParser parser(", ", "\r\n");
char a[10];
parser.parseLine("abc\r\n", a);

parser.parseLine("abc\r\n", a);
Serial.print(a);
Serial.println('.');
}
Expand All @@ -46,15 +46,59 @@ void demo4() {
// A two dimensional array can be used to extract substrings.
TextParser parser(" ", ".");
char words[5][6];
parser.parseLine("This is a nice line.", words);

parser.parseLine("This is a nice line.", words);
for (char* const w: words) {
Serial.print(w);
Serial.print(' ');
}
Serial.println();
}

void demo5() {
// `Bool` type for easy decoding.
TextParser parser(", ", "\r\n");
bool a[2];
Bool<truth> b[2]; // Text based booleans.

parser.parseLine("0, 1, No, Yes", a, b);
Serial.print(a[0]);
Serial.print(' ');
Serial.print(a[1]);
Serial.print(' ');
Serial.print(b[0].value);
Serial.print(' ');
Serial.println(b[1].value);
}

void demo6() {
// `Number` type for easy decoding.
TextParser parser(", ", "\r\n");
int a;
Number<int, 16> b; // Hexadecimal number.
Number<int, 2> c; // Binary number.

parser.parseLine("1, 0x17, 101101", a, b, c);
Serial.print(a);
Serial.print(' ');
Serial.print(b.value);
Serial.print(' ');
Serial.println(c.value);
}

void demo7() {
// `Category` type for easy decoding.
TextParser parser(", ", "\r\n");
Category<int, labels> a[3];

parser.parseLine("red, blue, yellow", a);
Serial.print(a[0].value);
Serial.print(' ');
Serial.print(a[1].value);
Serial.print(' ');
Serial.println(a[2].value);
}


void setup() {
Serial.begin(9600);
Expand All @@ -63,6 +107,9 @@ void setup() {
demo2();
demo3();
demo4();
demo5();
demo6();
demo7();
}

void loop() {}
28 changes: 0 additions & 28 deletions examples/multilevel/multilevel.ino

This file was deleted.

8 changes: 8 additions & 0 deletions src/textparser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@ ccp TextParser::findEnd_(ccp line) const {
}


bool strmatch(ccpc begin, ccpc end, ccpc str) {
ccp p;
ccp q;
for (p = begin, q = str; p < end and *q and *p == *q; p++, q++);
return p == end and not *q;
}


void TextParser::parse(char& result, ccpc begin, ccpc end) const {
result = 0;
if (begin < end) {
Expand Down
46 changes: 42 additions & 4 deletions src/textparser.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,26 @@ struct Bool {
bool value; //!< Value.
};

/*! Category.
*
* \tparam T Integer type.
* \tparam .
*/
template <class T, ccp* labels>
struct Category {
T value; //!< Value.
};


/*! String comparison.
*
* \param[in] begin Pointer to the first C string.
* \param[in] end Pointer to end of the first C string.
* \param[in] str Pointer to the second C string.
*/
bool strmatch(ccpc begin, ccpc end, ccpc str);


/*! Line based text parser. */
class TextParser {
ccpc delimiter_;
Expand Down Expand Up @@ -97,6 +117,15 @@ class TextParser {
template <class T, size_t base>
void parse(Number<T, base>& result, ccpc begin, ccpc end) const;

/*!
* \tparam T Integer type.
* \tparam labels Labels.
*
* \copydoc parse(char&, ccpc, ccpc) const
*/
template <class T, ccp* labels>
void parse(Category<T, labels>& result, ccpc begin, ccpc end) const;

/*!
* \tparam T Integer type.
*
Expand Down Expand Up @@ -160,10 +189,19 @@ void TextParser::parse(char (&result)[n], ccpc begin, ccpc end) const {

template <ccp truth>
void TextParser::parse(Bool<truth>& result, ccpc begin, ccpc end) const {
ccp p;
ccp q;
for (p = begin, q = truth; p < end and *q and *p == *q; p++, q++);
result.value = p == end and not *q;
result.value = strmatch(begin, end, truth);
}

template <class T, ccp* labels>
void TextParser::parse(
Category<T, labels>& result, ccpc begin, ccpc end) const {
result.value = -1;
for (size_t i = 0; labels[i]; i++) {
if (strmatch(begin, end, labels[i])) {
result.value = i;
return;
}
}
}

template <class T, size_t base>
Expand Down
Loading

0 comments on commit 8308735

Please sign in to comment.