Skip to content

Commit 6fdf204

Browse files
Bump antlr to 4.11 (#63)
**Motivation**: Version 4.11.1 is the latest of `antlr`. I don't see a clear reason not to use that version (other than the package needs `c++17` compared to `c++13`). Besides, updating antlr could make it possible to use [other grammar definitions](https://github.com/antlr/grammars-v4/blob/master/sql/tsql/TSqlParser.g4). **Changes**: See the commit history: 1. bump dependencies, 2. update `cpp_src/`, 3. run `helper_generate_parsers.sh`, 4. increase `c++` version. --------- Co-authored-by: Uwe L. Korn <[email protected]>
1 parent bf7421a commit 6fdf204

File tree

246 files changed

+14619
-22509
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

246 files changed

+14619
-22509
lines changed

docs/source/development.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,8 @@ in C++ for better performance. The parsed tree is then converted into the Python
6868

6969
If you want to adapt the grammar please have a look at ``pytsql/src/pytsql/grammar/tsql.g4``.
7070
All files in ``pytsql/src/pytsql/grammar/cpp_src/antlr4-cpp-runtime`` are taken directly from
71-
the `ANTLR repository release 4.9.3
72-
<https://github.com/antlr/antlr4/tree/4.9.3/runtime/Cpp/runtime>`_
71+
the `ANTLR repository release 4.11.1
72+
<https://github.com/antlr/antlr4/tree/4.11.1/runtime/Cpp/runtime>`_
7373
and the rest of the files in ``pytsql/src/pytsql/grammar`` are generated by ``antlr`` or
7474
``speedy-antlr-tool``.
7575

environment.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,5 @@ dependencies:
1515
- sphinxcontrib-apidoc
1616
- sqlalchemy
1717
- pyodbc
18-
- speedy-antlr-tool==1.3.1
19-
- antlr4-python3-runtime==4.9.3
18+
- speedy-antlr-tool==1.4.1
19+
- antlr4-python3-runtime==4.11.1

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ requires-python = ">=3.7.0"
3737

3838
dependencies = [
3939
"sqlalchemy >=1.4",
40-
"antlr4-python3-runtime ==4.9.3",
40+
"antlr4-python3-runtime ==4.11",
4141
"pyodbc >=4.0.30"
4242
]
4343

setup.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,10 @@ def create_extension() -> setuptools.Extension:
4040
"""Create the ANTLR C++ extension to be passed to `setuptools.setup`"""
4141

4242
extra_compile_args = {
43-
"windows": ["/DANTLR4CPP_STATIC", "/Zc:__cplusplus"],
44-
"linux": ["-std=c++11"],
45-
"darwin": ["-std=c++11"],
46-
"cygwin": ["-std=c++11"],
43+
"windows": ["/DANTLR4CPP_STATIC", "/Zc:__cplusplus", "/std:c++17"],
44+
"linux": ["-std=c++17"],
45+
"darwin": ["-std=c++17", "-D_LIBCPP_DISABLE_AVAILABILITY"],
46+
"cygwin": ["-std=c++17"],
4747
}
4848

4949
sources = glob.glob("src/pytsql/grammar/cpp_src/**/*.cpp", recursive=True)

src/pytsql/grammar/cpp_src/antlr4-cpp-runtime/ANTLRFileStream.cpp

-6
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
* can be found in the LICENSE.txt file in the project root.
44
*/
55

6-
#include "support/StringUtils.h"
7-
86
#include "ANTLRFileStream.h"
97

108
using namespace antlr4;
@@ -15,11 +13,7 @@ void ANTLRFileStream::loadFromFile(const std::string &fileName) {
1513
return;
1614
}
1715

18-
#ifdef _MSC_VER
19-
std::ifstream stream(antlrcpp::s2ws(fileName), std::ios::binary);
20-
#else
2116
std::ifstream stream(fileName, std::ios::binary);
22-
#endif
2317

2418
ANTLRInputStream::load(stream);
2519
}

src/pytsql/grammar/cpp_src/antlr4-cpp-runtime/ANTLRInputStream.cpp

+30-19
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#include "misc/Interval.h"
1010
#include "IntStream.h"
1111

12-
#include "support/StringUtils.h"
12+
#include "support/Utf8.h"
1313
#include "support/CPPUtils.h"
1414

1515
#include "ANTLRInputStream.h"
@@ -23,15 +23,9 @@ ANTLRInputStream::ANTLRInputStream() {
2323
InitializeInstanceFields();
2424
}
2525

26-
#if __cplusplus >= 201703L
27-
ANTLRInputStream::ANTLRInputStream(const std::string_view &input): ANTLRInputStream() {
26+
ANTLRInputStream::ANTLRInputStream(std::string_view input): ANTLRInputStream() {
2827
load(input.data(), input.length());
2928
}
30-
#endif
31-
32-
ANTLRInputStream::ANTLRInputStream(const std::string &input): ANTLRInputStream() {
33-
load(input.data(), input.size());
34-
}
3529

3630
ANTLRInputStream::ANTLRInputStream(const char *data, size_t length) {
3731
load(data, length);
@@ -41,28 +35,37 @@ ANTLRInputStream::ANTLRInputStream(std::istream &stream): ANTLRInputStream() {
4135
load(stream);
4236
}
4337

44-
void ANTLRInputStream::load(const std::string &input) {
45-
load(input.data(), input.size());
38+
void ANTLRInputStream::load(const std::string &input, bool lenient) {
39+
load(input.data(), input.size(), lenient);
4640
}
4741

48-
void ANTLRInputStream::load(const char *data, size_t length) {
42+
void ANTLRInputStream::load(const char *data, size_t length, bool lenient) {
4943
// Remove the UTF-8 BOM if present.
5044
const char *bom = "\xef\xbb\xbf";
51-
if (length >= 3 && strncmp(data, bom, 3) == 0)
52-
_data = antlrcpp::utf8_to_utf32(data + 3, data + length);
53-
else
54-
_data = antlrcpp::utf8_to_utf32(data, data + length);
45+
if (length >= 3 && strncmp(data, bom, 3) == 0) {
46+
data += 3;
47+
length -= 3;
48+
}
49+
if (lenient) {
50+
_data = Utf8::lenientDecode(std::string_view(data, length));
51+
} else {
52+
auto maybe_utf32 = Utf8::strictDecode(std::string_view(data, length));
53+
if (!maybe_utf32.has_value()) {
54+
throw IllegalArgumentException("UTF-8 string contains an illegal byte sequence");
55+
}
56+
_data = std::move(maybe_utf32).value();
57+
}
5558
p = 0;
5659
}
5760

58-
void ANTLRInputStream::load(std::istream &stream) {
61+
void ANTLRInputStream::load(std::istream &stream, bool lenient) {
5962
if (!stream.good() || stream.eof()) // No fail, bad or EOF.
6063
return;
6164

6265
_data.clear();
6366

6467
std::string s((std::istreambuf_iterator<char>(stream)), std::istreambuf_iterator<char>());
65-
load(s.data(), s.length());
68+
load(s.data(), s.length(), lenient);
6669
}
6770

6871
void ANTLRInputStream::reset() {
@@ -150,7 +153,11 @@ std::string ANTLRInputStream::getText(const Interval &interval) {
150153
return "";
151154
}
152155

153-
return antlrcpp::utf32_to_utf8(_data.substr(start, count));
156+
auto maybeUtf8 = Utf8::strictEncode(std::u32string_view(_data).substr(start, count));
157+
if (!maybeUtf8.has_value()) {
158+
throw IllegalArgumentException("Input stream contains invalid Unicode code points");
159+
}
160+
return std::move(maybeUtf8).value();
154161
}
155162

156163
std::string ANTLRInputStream::getSourceName() const {
@@ -161,7 +168,11 @@ std::string ANTLRInputStream::getSourceName() const {
161168
}
162169

163170
std::string ANTLRInputStream::toString() const {
164-
return antlrcpp::utf32_to_utf8(_data);
171+
auto maybeUtf8 = Utf8::strictEncode(_data);
172+
if (!maybeUtf8.has_value()) {
173+
throw IllegalArgumentException("Input stream contains invalid Unicode code points");
174+
}
175+
return std::move(maybeUtf8).value();
165176
}
166177

167178
void ANTLRInputStream::InitializeInstanceFields() {

src/pytsql/grammar/cpp_src/antlr4-cpp-runtime/ANTLRInputStream.h

+13-10
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
#pragma once
77

8+
#include <string_view>
9+
810
#include "CharStream.h"
911

1012
namespace antlr4 {
@@ -16,7 +18,7 @@ namespace antlr4 {
1618
protected:
1719
/// The data being scanned.
1820
// UTF-32
19-
UTF32String _data;
21+
std::u32string _data;
2022

2123
/// 0..n-1 index into string of next char </summary>
2224
size_t p;
@@ -26,18 +28,19 @@ namespace antlr4 {
2628
std::string name;
2729

2830
ANTLRInputStream();
29-
30-
#if __cplusplus >= 201703L
31-
ANTLRInputStream(const std::string_view &input);
32-
#endif
33-
34-
ANTLRInputStream(const std::string &input);
31+
32+
ANTLRInputStream(std::string_view input);
33+
3534
ANTLRInputStream(const char *data, size_t length);
3635
ANTLRInputStream(std::istream &stream);
3736

38-
virtual void load(const std::string &input);
39-
virtual void load(const char *data, size_t length);
40-
virtual void load(std::istream &stream);
37+
virtual void load(const std::string &input, bool lenient);
38+
virtual void load(const char *data, size_t length, bool lenient);
39+
virtual void load(std::istream &stream, bool lenient);
40+
41+
virtual void load(const std::string &input) { load(input, false); }
42+
virtual void load(const char *data, size_t length) { load(data, length, false); }
43+
virtual void load(std::istream &stream) { load(stream, false); }
4144

4245
/// Reset the stream so that it's in the same state it was
4346
/// when the object was created *except* the data array is not

src/pytsql/grammar/cpp_src/antlr4-cpp-runtime/CommonToken.cpp

+2-4
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010

1111
#include "misc/Interval.h"
1212

13-
#include "support/StringUtils.h"
1413
#include "support/CPPUtils.h"
14+
#include "support/StringUtils.h"
1515

1616
#include "CommonToken.h"
1717

@@ -165,9 +165,7 @@ std::string CommonToken::toString(Recognizer *r) const {
165165
}
166166
std::string txt = getText();
167167
if (!txt.empty()) {
168-
antlrcpp::replaceAll(txt, "\n", "\\n");
169-
antlrcpp::replaceAll(txt, "\r", "\\r");
170-
antlrcpp::replaceAll(txt, "\t", "\\t");
168+
txt = antlrcpp::escapeWhitespace(txt);
171169
} else {
172170
txt = "<no text>";
173171
}

src/pytsql/grammar/cpp_src/antlr4-cpp-runtime/DefaultErrorStrategy.cpp

+16-13
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,11 @@
1212
#include "atn/RuleTransition.h"
1313
#include "atn/ATN.h"
1414
#include "atn/ATNState.h"
15+
#include "support/StringUtils.h"
16+
#include "support/Casts.h"
1517
#include "Parser.h"
1618
#include "CommonToken.h"
1719
#include "Vocabulary.h"
18-
#include "support/StringUtils.h"
1920

2021
#include "DefaultErrorStrategy.h"
2122

@@ -106,19 +107,19 @@ void DefaultErrorStrategy::sync(Parser *recognizer) {
106107
}
107108

108109
switch (s->getStateType()) {
109-
case atn::ATNState::BLOCK_START:
110-
case atn::ATNState::STAR_BLOCK_START:
111-
case atn::ATNState::PLUS_BLOCK_START:
112-
case atn::ATNState::STAR_LOOP_ENTRY:
110+
case atn::ATNStateType::BLOCK_START:
111+
case atn::ATNStateType::STAR_BLOCK_START:
112+
case atn::ATNStateType::PLUS_BLOCK_START:
113+
case atn::ATNStateType::STAR_LOOP_ENTRY:
113114
// report error and recover if possible
114115
if (singleTokenDeletion(recognizer) != nullptr) {
115116
return;
116117
}
117118

118119
throw InputMismatchException(recognizer);
119120

120-
case atn::ATNState::PLUS_LOOP_BACK:
121-
case atn::ATNState::STAR_LOOP_BACK: {
121+
case atn::ATNStateType::PLUS_LOOP_BACK:
122+
case atn::ATNStateType::STAR_LOOP_BACK: {
122123
reportUnwantedToken(recognizer);
123124
misc::IntervalSet expecting = recognizer->getExpectedTokens();
124125
misc::IntervalSet whatFollowsLoopIterationOrRule = expecting.Or(getErrorRecoverySet(recognizer));
@@ -292,11 +293,13 @@ size_t DefaultErrorStrategy::getSymbolType(Token *symbol) {
292293
}
293294

294295
std::string DefaultErrorStrategy::escapeWSAndQuote(const std::string &s) const {
295-
std::string result = s;
296-
antlrcpp::replaceAll(result, "\n", "\\n");
297-
antlrcpp::replaceAll(result, "\r","\\r");
298-
antlrcpp::replaceAll(result, "\t","\\t");
299-
return "'" + result + "'";
296+
std::string result;
297+
result.reserve(s.size() + 2);
298+
result.push_back('\'');
299+
antlrcpp::escapeWhitespace(result, s);
300+
result.push_back('\'');
301+
result.shrink_to_fit();
302+
return result;
300303
}
301304

302305
misc::IntervalSet DefaultErrorStrategy::getErrorRecoverySet(Parser *recognizer) {
@@ -306,7 +309,7 @@ misc::IntervalSet DefaultErrorStrategy::getErrorRecoverySet(Parser *recognizer)
306309
while (ctx->invokingState != ATNState::INVALID_STATE_NUMBER) {
307310
// compute what follows who invoked us
308311
atn::ATNState *invokingState = atn.states[ctx->invokingState];
309-
atn::RuleTransition *rt = dynamic_cast<atn::RuleTransition*>(invokingState->transitions[0]);
312+
const atn::RuleTransition *rt = downCast<const atn::RuleTransition*>(invokingState->transitions[0].get());
310313
misc::IntervalSet follow = atn.nextTokens(rt->followState);
311314
recoverSet.addAll(follow);
312315

src/pytsql/grammar/cpp_src/antlr4-cpp-runtime/Exceptions.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ using namespace antlr4;
1010
RuntimeException::RuntimeException(const std::string &msg) : std::exception(), _message(msg) {
1111
}
1212

13-
const char* RuntimeException::what() const NOEXCEPT {
13+
const char* RuntimeException::what() const noexcept {
1414
return _message.c_str();
1515
}
1616

@@ -19,7 +19,7 @@ const char* RuntimeException::what() const NOEXCEPT {
1919
IOException::IOException(const std::string &msg) : std::exception(), _message(msg) {
2020
}
2121

22-
const char* IOException::what() const NOEXCEPT {
22+
const char* IOException::what() const noexcept {
2323
return _message.c_str();
2424
}
2525

src/pytsql/grammar/cpp_src/antlr4-cpp-runtime/Exceptions.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ namespace antlr4 {
1616
public:
1717
RuntimeException(const std::string &msg = "");
1818

19-
virtual const char* what() const NOEXCEPT override;
19+
virtual const char* what() const noexcept override;
2020
};
2121

2222
class ANTLR4CPP_PUBLIC IllegalStateException : public RuntimeException {
@@ -77,7 +77,7 @@ namespace antlr4 {
7777
public:
7878
IOException(const std::string &msg = "");
7979

80-
virtual const char* what() const NOEXCEPT override;
80+
virtual const char* what() const noexcept override;
8181
};
8282

8383
class ANTLR4CPP_PUBLIC CancellationException : public IllegalStateException {

src/pytsql/grammar/cpp_src/antlr4-cpp-runtime/FailedPredicateException.cpp

+5-4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "atn/PredicateTransition.h"
99
#include "atn/ATN.h"
1010
#include "atn/ATNState.h"
11+
#include "support/Casts.h"
1112
#include "support/CPPUtils.h"
1213

1314
#include "FailedPredicateException.h"
@@ -26,10 +27,10 @@ FailedPredicateException::FailedPredicateException(Parser *recognizer, const std
2627
recognizer->getInputStream(), recognizer->getContext(), recognizer->getCurrentToken()) {
2728

2829
atn::ATNState *s = recognizer->getInterpreter<atn::ATNSimulator>()->atn.states[recognizer->getState()];
29-
atn::Transition *transition = s->transitions[0];
30-
if (is<atn::PredicateTransition*>(transition)) {
31-
_ruleIndex = static_cast<atn::PredicateTransition *>(transition)->ruleIndex;
32-
_predicateIndex = static_cast<atn::PredicateTransition *>(transition)->predIndex;
30+
const atn::Transition *transition = s->transitions[0].get();
31+
if (transition->getTransitionType() == atn::TransitionType::PREDICATE) {
32+
_ruleIndex = downCast<const atn::PredicateTransition&>(*transition).getRuleIndex();
33+
_predicateIndex = downCast<const atn::PredicateTransition&>(*transition).getPredIndex();
3334
} else {
3435
_ruleIndex = 0;
3536
_predicateIndex = 0;

src/pytsql/grammar/cpp_src/antlr4-cpp-runtime/FailedPredicateException.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ namespace antlr4 {
1515
/// prediction.
1616
class ANTLR4CPP_PUBLIC FailedPredicateException : public RecognitionException {
1717
public:
18-
FailedPredicateException(Parser *recognizer);
18+
explicit FailedPredicateException(Parser *recognizer);
1919
FailedPredicateException(Parser *recognizer, const std::string &predicate);
2020
FailedPredicateException(Parser *recognizer, const std::string &predicate, const std::string &message);
2121

0 commit comments

Comments
 (0)