From 8fcd557e74c55774ac7370320d1348ddb975e9f6 Mon Sep 17 00:00:00 2001 From: Alain O'Dea Date: Tue, 15 Jan 2019 17:46:19 -0330 Subject: [PATCH] fix Accepts invalid domains (#7) - Broaden GENERIC lexer regex to include non-ASCII for IDNs - Consider consecutive GENERIC tokens invalid in domain-part - Consider a leading hyphen in any part of a domain name an error --- src/main/java/emailvalidator4j/lexer/EmailLexer.java | 2 +- src/main/java/emailvalidator4j/parser/DomainPart.java | 9 +++++++++ .../parser/exception/ConsecutiveGeneric.java | 7 +++++++ .../java/emailvalidator4j/parser/DomainPartTest.java | 4 ++++ 4 files changed, 21 insertions(+), 1 deletion(-) mode change 100644 => 100755 src/main/java/emailvalidator4j/parser/DomainPart.java create mode 100644 src/main/java/emailvalidator4j/parser/exception/ConsecutiveGeneric.java diff --git a/src/main/java/emailvalidator4j/lexer/EmailLexer.java b/src/main/java/emailvalidator4j/lexer/EmailLexer.java index 7356eb9..b958a8f 100644 --- a/src/main/java/emailvalidator4j/lexer/EmailLexer.java +++ b/src/main/java/emailvalidator4j/lexer/EmailLexer.java @@ -16,7 +16,7 @@ public class EmailLexer { public void lex(String input) { Pattern pattern = Pattern.compile( - "([a-zA-Z_]+[46]?)|([0-9]+)|(\r\n)|(::)|(\\s+?)|(.)|(\\p{Cc}+)", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE + "(([a-zA-Z_]|[^\\u0000-\\u007F])+[46]?)|([0-9]+)|(\r\n)|(::)|(\\s+?)|(.)|(\\p{Cc}+)", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE ); Matcher matcher = pattern.matcher(input); diff --git a/src/main/java/emailvalidator4j/parser/DomainPart.java b/src/main/java/emailvalidator4j/parser/DomainPart.java old mode 100644 new mode 100755 index 1eae87a..ac7e3d0 --- a/src/main/java/emailvalidator4j/parser/DomainPart.java +++ b/src/main/java/emailvalidator4j/parser/DomainPart.java @@ -55,6 +55,10 @@ private void doParseDomainPart() throws InvalidEmail { int domainPartOpenedParenthesis = 0; boolean openBrackets = false; do { + if (this.lexer.getCurrent().equals(Tokens.HYPHEN)) { + throw new DomainHyphen("Found - in domain part"); + } + if (this.lexer.getCurrent().equals(Tokens.SEMICOLON)) { throw new ExpectedATEXT("Expected ATEXT"); } @@ -149,6 +153,11 @@ private void checkExceptions() throws InvalidEmail { if (this.lexer.getCurrent().equals(Tokens.BACKSLASH) && this.lexer.isNextToken(Tokens.get("GENERIC"))) { throw new ExpectedATEXT("Found BACKSLASH"); } + + if (this.lexer.getCurrent().equals(Tokens.get("GENERIC")) && this.lexer.isNextToken(Tokens.get("GENERIC"))) { + this.lexer.next(); + throw new ConsecutiveGeneric("Found " + this.lexer.getCurrent().getText()); + } } private void checkIPv6Tag(String literal) { diff --git a/src/main/java/emailvalidator4j/parser/exception/ConsecutiveGeneric.java b/src/main/java/emailvalidator4j/parser/exception/ConsecutiveGeneric.java new file mode 100644 index 0000000..b1d3a1d --- /dev/null +++ b/src/main/java/emailvalidator4j/parser/exception/ConsecutiveGeneric.java @@ -0,0 +1,7 @@ +package emailvalidator4j.parser.exception; + +public class ConsecutiveGeneric extends InvalidEmail { + public ConsecutiveGeneric(String message) { + super(message); + } +} diff --git a/src/test/java/emailvalidator4j/parser/DomainPartTest.java b/src/test/java/emailvalidator4j/parser/DomainPartTest.java index 88e594f..0ead3e0 100644 --- a/src/test/java/emailvalidator4j/parser/DomainPartTest.java +++ b/src/test/java/emailvalidator4j/parser/DomainPartTest.java @@ -47,10 +47,14 @@ public static Object[][] invalidDomainParts() { {ConsecutiveAT.class, "@@start"}, {ExpectedATEXT.class, "@at[start"}, {DomainHyphen.class, "@atstart-.com"}, + {DomainHyphen.class, "@bb.-cc"}, + {DomainHyphen.class, "@bb.-cc-"}, + {DomainHyphen.class, "@bb.cc-"}, {DomainNotAllowedCharacter.class, "@atst\\art.com"}, {DomainNotAllowedCharacter.class, "@example\\"}, {DomainNotAllowedCharacter.class, "@exa\\mple"}, {UnclosedDomainLiteral.class, "@example]"}, + {ConsecutiveGeneric.class, "@example'"}, }; }