From 262fa1420cfd93bf2af0a210d560e7282c60ac81 Mon Sep 17 00:00:00 2001 From: Ian Watson Date: Thu, 21 Nov 2024 18:30:52 -0500 Subject: [PATCH] linear_fingerprint: ring bits --- contrib/bin/.gitignore | 1 + docs/Molecule_Tools/grep_molecule.md | 7 + docs/Molecule_Tools/iwdescr.md | 3 +- .../iwmisc/proto_for_testing.proto | 24 ++++ src/Foundational/iwstring/BUILD | 13 ++ src/Foundational/iwstring/iwstring.h | 18 +++ .../iwstring/tokenise_with_quotes.cc | 105 ++++++++++++++ .../iwstring/tokenise_with_quotes_test.cc | 60 ++++++++ src/Molecule_Lib/linear_fingerprint.cc | 128 ++++++++++++------ src/Molecule_Lib/linear_fingerprint.h | 2 + src/Molecule_Tools/grep_molecule.cc | 6 +- src/Molecule_Tools/linear_fingerprint_main.cc | 50 ++++--- src/Utilities/General/iwcut.cc | 39 +++--- 13 files changed, 370 insertions(+), 86 deletions(-) create mode 100644 contrib/bin/.gitignore create mode 100644 src/Foundational/iwmisc/proto_for_testing.proto create mode 100644 src/Foundational/iwstring/tokenise_with_quotes.cc create mode 100644 src/Foundational/iwstring/tokenise_with_quotes_test.cc diff --git a/contrib/bin/.gitignore b/contrib/bin/.gitignore new file mode 100644 index 00000000..ebf4281d --- /dev/null +++ b/contrib/bin/.gitignore @@ -0,0 +1 @@ +!lib diff --git a/docs/Molecule_Tools/grep_molecule.md b/docs/Molecule_Tools/grep_molecule.md index bf50fa36..d4337988 100644 --- a/docs/Molecule_Tools/grep_molecule.md +++ b/docs/Molecule_Tools/grep_molecule.md @@ -125,6 +125,13 @@ Worst case would be finding a set of molecules in itself, which would require the full unique smiles computation. Nevertheless, doing this on 20k random molecules takes just 2.7 seconds. +## Options +Chirality can be removed from both the needle molecule and the molecules to +be searched, add the -c option. + +Molecules can be reduced to the largest fragment via the -l option. Again, +the transformation is applied to both the needle and the haystack molecules. + ## Further Optimisation The number of chiral centres could be included with the aromatic molecular formula, or perhaps discerned from the starting smiles. If chirality is being considered diff --git a/docs/Molecule_Tools/iwdescr.md b/docs/Molecule_Tools/iwdescr.md index 4d91d0a3..2015f335 100644 --- a/docs/Molecule_Tools/iwdescr.md +++ b/docs/Molecule_Tools/iwdescr.md @@ -16,7 +16,6 @@ see various missing columns. ## Descriptors. The following descriptors are computed. -| ---- | ---------- | | name | definition | | ---- | ---------- | | natoms | the number of atoms in the molecule | @@ -323,7 +322,7 @@ off all optional descriptors. This can make a big difference in run times. Runni ``` iwdescr.sh -O all file.smi > file.w ``` -takes 5.7 seconds to process 20k molecules, generating 264 columns of ouput. Running +takes 5.7 seconds to process 20k molecules, generating 277 columns of ouput. Running ``` iwdescr.sh -O none file.smi > file.w ``` diff --git a/src/Foundational/iwmisc/proto_for_testing.proto b/src/Foundational/iwmisc/proto_for_testing.proto new file mode 100644 index 00000000..ff04c0b5 --- /dev/null +++ b/src/Foundational/iwmisc/proto_for_testing.proto @@ -0,0 +1,24 @@ +syntax = "proto3"; + +package for_testing; + +message SubMessage { + optional int32 i1 = 1; + optional string str1 = 2; +} + +message TestMessage { + optional string str1 = 1; + optional string str2 = 2; + + optional int32 i1 = 3; + optional uint32 ui1 = 4; + optional float x = 5; + + repeated int32 int_array = 6; + repeated float float_array = 7; + + repeated string repeated_string = 8; + + optional SubMessage sub_message = 9; +} diff --git a/src/Foundational/iwstring/BUILD b/src/Foundational/iwstring/BUILD index 055acf7b..1ba5d85e 100644 --- a/src/Foundational/iwstring/BUILD +++ b/src/Foundational/iwstring/BUILD @@ -28,6 +28,7 @@ cc_library( "remove_suffix.cc", "string_change.cc", "string_relationals.cc", + "tokenise_with_quotes.cc", "unhtml.cc", ], hdrs = [ @@ -88,3 +89,15 @@ cc_test( ], timeout="short", ) + +cc_test( + name = "tokenise_with_quotes_test", + srcs = [ + "tokenise_with_quotes_test.cc", + ], + deps = [ + ":iwstring", + "@googletest//:gtest", + "@googletest//:gtest_main", + ], +) diff --git a/src/Foundational/iwstring/iwstring.h b/src/Foundational/iwstring/iwstring.h index 1349dc1e..282a8c31 100644 --- a/src/Foundational/iwstring/iwstring.h +++ b/src/Foundational/iwstring/iwstring.h @@ -1023,6 +1023,24 @@ Equals(const const_IWSubstring& lhs, const std::string_view& rhs) { return 0 == ::strncmp(lhs.data(), rhs.data(), lhs.length()); } +// Used for reading records from tabular files where there might +// be quoted tokens. +// For each token encountered, add to `tstart` and `tstop` the +// start and stop for that token - excluding quotes. +// The output can be processed with something like: +// +// int ntokens = TokeniseWithQuotes(buffer, ',', _tstart, _tstop); +// if (ntokens < 0) .... fail. +// for (int i = 0; i < ntokens; ++i) { +// int b = _tstart[i]; +// int e = _tstop[i]; +// const_IWSubstring token(buffer.rawdata() + b, e - b); +int +TokeniseWithQuotes(const const_IWSubstring& buffer, + char sep, + resizable_array& tstart, + resizable_array& tstop); + } // namespace iwstring inline std::ostream & diff --git a/src/Foundational/iwstring/tokenise_with_quotes.cc b/src/Foundational/iwstring/tokenise_with_quotes.cc new file mode 100644 index 00000000..3d749350 --- /dev/null +++ b/src/Foundational/iwstring/tokenise_with_quotes.cc @@ -0,0 +1,105 @@ +#include + +#include "Foundational/iwstring/iwstring.h" + +namespace iwstring { + +using std::cerr; + +constexpr int kInvalid = -1; + +int +TokeniseWithQuotes(const const_IWSubstring& buffer, + char sep, + resizable_array& tstart, + resizable_array& tstop) { + tstart.resize_keep_storage(0); + tstop.resize_keep_storage(0); + + static constexpr char kDQuote = '"'; + + const int nchars = buffer.length(); + if (nchars == 0) { + return 0; + } + + // Maybe allow for empty token at start? + if (buffer[0] == sep) { + return kInvalid; + } + + bool inside_quoted_string = false; + int ntokens = 1; + + if (buffer[0] == kDQuote) { + tstart << 1; + inside_quoted_string = true; + } else { + tstart << 0; + } + + for (int i = 1; i < nchars; ++i) { + const char c = buffer[i]; + char next_char; + if (i == nchars - 1) { + next_char = '\0'; + } else { + next_char = buffer[i + 1]; + } + + if (inside_quoted_string) { + if (c == kDQuote && (next_char == sep || next_char == '\0')) { + inside_quoted_string = false; + } + } else if (c == sep) { + if (buffer[i-1] == kDQuote) { + tstop << (i - 1); + } else { + tstop << (i - 0); + } + if (next_char == kDQuote) { + tstart << (i + 2); + } else { + tstart << (i + 1); + } + ++ntokens; + } else if (c == kDQuote && buffer[i-1] == sep) { + inside_quoted_string = true; + } + } + + if (inside_quoted_string) { + cerr << "TokeniseWithQuotes:unclosed quote '" << buffer << "'\n"; + return kInvalid; + } + + if (buffer.ends_with(kDQuote)) { + tstop << (buffer.length() - 1); + } else { + tstop << (buffer.length() - 0); + } + + if (tstart.size() != tstop.size()) { + cerr << "TokeniseWithQuotes::Mismatch between opening and closing tokens\n"; + cerr << tstart.size() << " vs " << tstop.size() << '\n'; + return kInvalid; + } + + if (tstart.number_elements() != ntokens) { + cerr << "TokeniseWithQuotes:Mismatch btw tokens " << ntokens << + " and array size " << tstart.size() << '\n'; + return kInvalid; + } + +// #define DEBUG_TOKENISE_WITH_QUOTES +#ifdef DEBUG_TOKENISE_WITH_QUOTES + cerr << "Found " << ntokens << " tokens\n"; + for (int i = 0; i < tstart.number_elements(); ++i) { + cerr << ' ' << i << " start " << tstart[i] << ' ' << buffer[tstart[i]] << + " stop " << tstop[i] << ' ' << buffer[tstop[i]] << '\n'; + } +#endif + + return ntokens; +} +} // namespace iwstring diff --git a/src/Foundational/iwstring/tokenise_with_quotes_test.cc b/src/Foundational/iwstring/tokenise_with_quotes_test.cc new file mode 100644 index 00000000..e190e865 --- /dev/null +++ b/src/Foundational/iwstring/tokenise_with_quotes_test.cc @@ -0,0 +1,60 @@ + +#include "googlemock/include/gmock/gmock.h" +#include "googletest/include/gtest/gtest.h" + +#include "iwstring.h" + +namespace { + +using iwstring::TokeniseWithQuotes; + +struct Data { + IWString buffer; + char sep; + int ntokens; + std::vector expected; +}; + +class TestTokenise: public testing::TestWithParam { + protected: + resizable_array _tstart; + resizable_array _tstop; +}; + +TEST_P(TestTokenise, TestTokenise) { + const auto params = GetParam(); + EXPECT_EQ(TokeniseWithQuotes(params.buffer, params.sep, _tstart, _tstop), params.ntokens) << + params.buffer; + + // Expected failure encountered, cannot extract matching tokens. + if (params.ntokens < 0) { + return; + } + + for (int i = 0; i < params.ntokens; ++i) { + int b = _tstart[i]; + int e = _tstop[i]; + // std::cerr << "b " << b << " e " << e << '\n'; + const_IWSubstring token(params.buffer.rawdata() + b, e - b); + EXPECT_EQ(params.expected[i], token) << i << " mismatch '" << params.expected[i] << + "' got '" << token << "' in " << params.buffer; + } +} +INSTANTIATE_TEST_SUITE_P(TestTokenise, TestTokenise, testing::Values( + Data{"a,b", ',', 2, {"a", "b"}}, + Data{"aa,b", ',', 2, {"aa", "b"}}, + Data{"aa,bb", ',', 2, {"aa", "bb"}}, + Data{"aaa,bb", ',', 2, {"aaa", "bb"}}, + Data{R"("a","b")", ',', 2, {"a", "b"}}, + Data{R"("a a","b")", ',', 2, {"a a", "b"}}, + Data{R"("a a","b b")", ',', 2, {"a a", "b b"}}, + Data{R"(a,"b b")", ',', 2, {"a", "b b"}}, + Data{R"("a a",b)", ',', 2, {"a a", "b"}}, + Data{R"("a,a",b)", ',', 2, {"a,a", "b"}}, + Data{R"("a,a",,b)", ',', 3, {"a,a", "", "b"}}, + Data{R"("a,a",,b,)", ',', 4, {"a,a", "", "b", ""}}, + Data{R"(,"a,a",,b,)", ',', -1, {"", "a,a", "", "b", ""}} +)); + + +} // namespace diff --git a/src/Molecule_Lib/linear_fingerprint.cc b/src/Molecule_Lib/linear_fingerprint.cc index d055b73f..49064af1 100644 --- a/src/Molecule_Lib/linear_fingerprint.cc +++ b/src/Molecule_Lib/linear_fingerprint.cc @@ -12,7 +12,7 @@ using std::endl; namespace internal { -constexpr int exclude_atom = -1; +constexpr int kExcludeAtom = -1; Options::Options () { @@ -89,7 +89,7 @@ LinearFpStatus::LinearFpStatus(const Options& opt, const Molecule& m, if (include_atom[i]) _atom_in_path[i] = 0; else - _atom_in_path[i] = exclude_atom; + _atom_in_path[i] = kExcludeAtom; } for (int i = 0; i < _nedges; ++i) { @@ -133,6 +133,26 @@ LinearFpStatus::~LinearFpStatus() return; } +int +LinearFpStatus::DebugPrint(std::ostream& output) const { + output << "LinearFpStatus:path length " << _path_length << '\n'; + if (_path_length == 0) { + return output.good(); + } + + output << "0 atom " << _path_index[0] << '\n'; + for (int i = 1; i < _path_length; i += 2) { + output << i << " bond " << _path_index[i] << " atom " << _path_index[i + 1] << '\n'; + } + + for (int i = 0; i < _matoms; ++i) { + output << " atom " << i << " in path " << _atom_in_path[i] << '\n'; + } + + + return output.good(); +} + uint64_t LinearFpStatus::_BondHash(const Bond& b) const { if (b.is_aromatic()) @@ -160,6 +180,7 @@ LinearFpStatus::_AddBondToPath(const Bond & b, const atom_number_t next_atom) { #ifdef DEBUG_LINEAR_FP cerr << "At length " << _path_length << " adding bond number " << bond_number << " value " << _bond_constant[bond_number] << endl; + cerr << "_AddBondToPath adding atom " << next_atom << " length " << _path_length << '\n'; #endif _path[_path_length] = _bond_constant[bond_number]; @@ -186,6 +207,9 @@ LinearFpStatus::_PopPath() { const int atom_number = _path_index[_path_length]; assert(_atom_in_path[atom_number]); _atom_in_path[atom_number]--; +#ifdef DEBUG_EXPAND + cerr << "_PopPath removing atom " << atom_number << " at length " << _path_length << '\n'; +#endif _path_length--; @@ -223,13 +247,14 @@ LinearFpStatus::Fingerprint() { return 1; } - if (nullptr != _stream_for_bit_meanings) + if (nullptr != _stream_for_bit_meanings) { _WriteLabelledSmiles(); + } - for (int i = 0; i < _matoms; ++i) - { - if (exclude_atom == _atom_in_path[i]) + for (int i = 0; i < _matoms; ++i) { + if (kExcludeAtom == _atom_in_path[i]) { continue; + } _StartPath(i); _MaybeFormBit(); @@ -249,37 +274,56 @@ LinearFpStatus::Fingerprint() { void LinearFpStatus::_Expand() { - if (_path_length / 2 >= _options._max_length) + if (_path_length / 2 >= _options._max_length) { return; + } - const atom_number_t a1 = _path_index[_path_length - 1]; + // Extract to single variable to avoid complex comparisons in the loop. + bool rings_or_crossing_paths; + if (_path_length / 2 < 3) { + rings_or_crossing_paths = false; + } else { + rings_or_crossing_paths = (_options._fingerprint_ring_presence || + _options._paths_can_cross); + } - const Atom * a = _atom[a1]; + const atom_number_t a1 = _path_index[_path_length - 1]; +#ifdef DEBUG_EXPAND + cerr << "Expand from atom " << a1 << '\n'; + DebugPrint(cerr); +#endif - const int acon = a->ncon(); + const Atom* a = _atom[a1]; - for (int i = 0; i < acon; ++i) { - const Bond * b = a->item(i); - if (_bond_in_path[b->bond_number()]) + for (const Bond* b : *a) { + if (_bond_in_path[b->bond_number()]) { // catches return to previous atom. continue; + } const atom_number_t a2 = b->other(a1); - if (exclude_atom == _atom_in_path[a2]) + if (kExcludeAtom == _atom_in_path[a2]) { continue; + } - bool a2_already_in_path; - if (!_atom_in_path[a2]) // The easy case - a2_already_in_path = false; - else if (_options._fingerprint_ring_presence || - _options._paths_can_cross) - a2_already_in_path = true; - else // Avoid placed atom. + bool a2_already_in_path = false; + if (! _atom_in_path[a2]) { + // New atom, great. + } else if (! rings_or_crossing_paths) { + // In path, but not doing anything with rings or crossing paths. continue; + } else { // Already in path, maybe ring and/or crossing path. + if (_options._fingerprint_ring_presence) { + _AddBondToPath(*b, a2); + _FormRingBit(); + _PopPath(); + } + // TODO:ianwatson implement crossing path idea. + continue; + } _AddBondToPath(*b, a2); _MaybeFormBit(); - if (a2_already_in_path) - { + if (a2_already_in_path) { if (_options._fingerprint_ring_presence) _FormRingBit(); if (!_options._paths_can_cross) @@ -346,34 +390,32 @@ LinearFpStatus::_MaybeFormBit() { // Atom at end of path occurs somewhere previously. Find it. void -LinearFpStatus::_FormRingBit() -{ +LinearFpStatus::_FormRingBit() { const int target = _path_index[_path_length - 1]; - int first_index = -1; - for (int i = 0; i < (_path_length - 1); i += 2) - { - if (_path_index[i] == target) - { - first_index = i; + int last_index = -1; + for (int i = _path_length -3; i >= 0; i -= 2) { + if (_path_index[i] == target) { + last_index = i; break; } } - if (first_index < 0) - { - cerr << "LinearFpStatus:_FormRingBit:first occurrence not found\n"; + if (last_index < 0) { + cerr << "LinearFpStatus:_FormRingBit:first occurrence not found, target " << target << '\n'; _PrintPath(cerr); return; } - uint64_t t1 = _path[first_index]; + uint64_t t1 = _path[last_index]; uint64_t t2 = _path[_path_length - 1]; - if (t1 < t2) + if (t1 < t2) { std::swap(t1, t2); + } - _sfc.hit_bit(_magic1 * t1 + (_path_length - first_index) * (t2 + _magic2)); + // Open question, should we include the bond type in this calculation? + _sfc.hit_bit(_magic1 * t1 + (_path_length - last_index) * (t2 + _magic2)); } void @@ -410,20 +452,22 @@ LinearFpStatus::_FormFingerprintBackward() } void -LinearFpStatus::_ExamineBit(const uint64_t b) -{ +LinearFpStatus::_ExamineBit(const uint64_t b) { #ifdef DEBUG_LINEAR_FP cerr << "Formed bit " << b < like the -f option to grep, read the patterns (smiles) from - -v verbose output + -f like the -f option to grep, read the patterns (smiles) from . + -c remove chirality before comparing. + -l reduce to largest fragment. + -v verbose output. )"; ::exit(rc); diff --git a/src/Molecule_Tools/linear_fingerprint_main.cc b/src/Molecule_Tools/linear_fingerprint_main.cc index d4015a5d..843d45a4 100644 --- a/src/Molecule_Tools/linear_fingerprint_main.cc +++ b/src/Molecule_Tools/linear_fingerprint_main.cc @@ -67,32 +67,33 @@ usage(int rc) #endif // clang-format on // clang-format off - cerr << "Computes linear path fingerprints\n"; - cerr << " -r minimum path length (def 0)\n"; - cerr << " -R maximum path length (def 7)\n"; - cerr << " -P ... atom type specification\n"; - cerr << " -J tag for fingerprints\n"; - cerr << " -f function as a TDT filter\n"; - cerr << " -X look for bits in and provide explanations\n"; - cerr << " -B write all bits found to \n"; - cerr << " -y check for bit collisions\n"; - cerr << " -s gather statistics on molecules processed\n"; - cerr << " -c produce isotopically labelled smiles with coverage\n"; - cerr << " -x allow linear paths can cross\n"; - cerr << " -l reduce to largest fragment\n"; - cerr << " -i input specification\n"; - cerr << " -g ... chemical standardisation options\n"; - cerr << " -E ... standard element specifications\n"; - cerr << " -A ... standard aromaticity specifications\n"; - cerr << " -v verbose output\n"; + cerr << R"(Computes linear path fingerprints + -r minimum path length (def 0) + -R maximum path length (def 7) + -P ... atom type specification + -J tag for fingerprints + -f function as a TDT filter + -X look for bits in and provide explanations + -B write all bits found to + -y check for bit collisions + -s gather statistics on molecules processed + -c produce isotopically labelled smiles with coverage + -x allow linear paths can cross + -w set ring bits when a path forms a ring + -l reduce to largest fragment + -i input specification + -g ... chemical standardisation options + -E ... standard element specifications + -A ... standard aromaticity specifications + -v verbose output +)"; // clang-format on exit(rc); } void -Preprocess(Molecule & m) -{ +Preprocess(Molecule & m) { if (reduce_to_largest_fragment) m.reduce_to_largest_fragment(); @@ -268,7 +269,7 @@ LinearFingerprint(const char * fname, FileType input_type, int LinearFingerprint(int argc, char ** argv) { - Command_Line cl(argc, argv, "E:A:K:lg:i:J:P:vfr:R:ysB:cx"); + Command_Line cl(argc, argv, "E:A:K:lg:i:J:P:vfr:R:ysB:cxw"); if (cl.unrecognised_options_encountered()) usage(1); @@ -397,6 +398,13 @@ LinearFingerprint(int argc, char ** argv) cerr << "Paths can cross\n"; } + if (cl.option_present('w')) { + linear_fp_gen.set_fingerprint_ring_presence(true); + if (verbose) { + cerr << "Will set bits for presence of rings\n"; + } + } + if (cl.option_present('B')) { const char * fname = cl.option_value('B'); if (!linear_fp_gen.OpenStreamForBitMeanings(fname)) { diff --git a/src/Utilities/General/iwcut.cc b/src/Utilities/General/iwcut.cc index d8e3b713..dc17cd6d 100644 --- a/src/Utilities/General/iwcut.cc +++ b/src/Utilities/General/iwcut.cc @@ -6,19 +6,19 @@ + __GNUC_MINOR__ * 100 \ + __GNUC_PATCHLEVEL__) +#include +#include #include #include -#include #include -#include -using std::cerr; -using std::endl; #include "Foundational/cmdline/cmdline.h" #include "Foundational/data_source/iwstring_data_source.h" #include "Foundational/iwmisc/misc.h" #include "Foundational/iwmisc/iwre2.h" +using std::cerr; + static void usage(int rc) { @@ -710,7 +710,7 @@ iwcut(const const_IWSubstring & buffer, { cerr << ' ' << word_beginnings[i]; } - cerr << endl; + cerr << '\n'; #endif for (int i = 0; i < nr; i++) @@ -797,7 +797,7 @@ locate_quoted_tokens_word_beginnings(const const_IWSubstring & buffer, for (int i = 1; i < n; ++i) { -// cerr << " char " << i << " '" << buffer[i] << "' quote " << in_quote << endl; +// cerr << " char " << i << " '" << buffer[i] << "' quote " << in_quote << '\n'; if (dquote == buffer[i]) in_quote = ! in_quote; else if (in_quote) @@ -809,7 +809,7 @@ locate_quoted_tokens_word_beginnings(const const_IWSubstring & buffer, #ifdef DEBUG_QUOTED_WB for (int i = 0; i < word_beginnings.size(); ++i) { - cerr << " wb " << i << ' ' << word_beginnings[i] << endl; + cerr << " wb " << i << ' ' << word_beginnings[i] << '\n'; } #endif @@ -828,7 +828,7 @@ iwcut(const const_IWSubstring & buffer, if (columns_in_input > 0) word_beginnings.resize(columns_in_input); -//cerr << "Line " << __LINE__ << " iqt " << input_is_quoted_tokens << endl; +//cerr << "Line " << __LINE__ << " iqt " << input_is_quoted_tokens << '\n'; int ncol; if (input_is_quoted_tokens) @@ -840,7 +840,7 @@ iwcut(const const_IWSubstring & buffer, #ifdef DEBUG_IWCUT cerr << "Processing '" << buffer << "'\n"; - cerr << "ncol " << ncol << " count " << buffer.ccount(input_token_separator) << endl; + cerr << "ncol " << ncol << " count " << buffer.ccount(input_token_separator) << '\n'; #endif if (ncol > columns_in_input) @@ -944,7 +944,7 @@ find_column_number(const IWString & descriptor, columns_requested.add_if_not_already_present(i); if (verbose > 1) - cerr << "Descriptor '" << d << " in column " << (i + 1) << endl; + cerr << "Descriptor '" << d << " in column " << (i + 1) << '\n'; return 1; } @@ -968,8 +968,9 @@ identify_column (const IWString & descriptor, resizable_array & columns_requested) { - if (! match_descriptor_names_as_regular_expressions) + if (! match_descriptor_names_as_regular_expressions) { return find_column_number(descriptor, header, columns_requested); + } re2::StringPiece tmp(descriptor.data(), descriptor.length()); RE2 rx(tmp); @@ -995,7 +996,7 @@ identify_column (const IWString & descriptor, rc++; if (verbose > 1) - cerr << "Descriptor '" << d << " in column " << (i + 1) << endl; + cerr << "Descriptor '" << d << " in column " << (i + 1) << '\n'; } } @@ -1014,7 +1015,7 @@ do_split(const const_IWSubstring & buffer, const char dquote = '"'; -//cerr << "Looking for wb in '" << buffer << endl; +//cerr << "Looking for wb in '" << buffer << '\n'; int previous_delimiter = -1; // @@ -1087,7 +1088,7 @@ determine_descriptors_to_be_output(const const_IWSubstring & buffer, cerr << "header split into " << header.size() << " items\n"; for (int i = 0; i < header.number_elements(); ++i) { - cerr << " col " << i << " dname " << *header[i] << endl; + cerr << " col " << i << " dname " << *header[i] << '\n'; } #endif @@ -1127,7 +1128,7 @@ determine_descriptors_to_be_output(const const_IWSubstring & buffer, continue; } - cerr << buffer << endl; + cerr << buffer << '\n'; rc = 0; } @@ -1188,7 +1189,7 @@ iwcut(iwstring_data_source & input, if (! iwcut(buffer, columns_requested, output)) { - cerr << "Fatal error on line " << input.lines_read() << endl; + cerr << "Fatal error on line " << input.lines_read() << '\n'; return 0; } @@ -1380,7 +1381,7 @@ iwcut (int argc, char ** argv) // { // cerr << ' ' << (columns_requested[i] + 1); // } -// cerr << endl; +// cerr << '\n'; // } } @@ -1523,7 +1524,7 @@ iwcut (int argc, char ** argv) cerr << "Will extract these descriptors\n"; for (int i = 0; i < descriptors_requested.number_elements(); i++) { - cerr << ' ' << *(descriptors_requested[i]) << endl; + cerr << ' ' << *(descriptors_requested[i]) << '\n'; } } @@ -1532,7 +1533,7 @@ iwcut (int argc, char ** argv) cerr << "Will extract these columns\n"; for (int i = 0; i < columns_requested.number_elements(); i++) { - cerr << ' ' << (columns_requested[i] + 1) << endl; + cerr << ' ' << (columns_requested[i] + 1) << '\n'; } }