Skip to content

Commit 82a8932

Browse files
authored
Extend ScamBlocker to detect image-only scam messages, plus unit tests (#1279)
Added two new config params: * suspiciousAttachmentsThreshold * suspiciousAttachmentNamePattern
1 parent 1592157 commit 82a8932

File tree

5 files changed

+168
-4
lines changed

5 files changed

+168
-4
lines changed

application/config.json.template

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,9 @@
7777
"crypto",
7878
"tele"
7979
],
80-
"isHostSimilarToKeywordDistanceThreshold": 2
80+
"isHostSimilarToKeywordDistanceThreshold": 2,
81+
"suspiciousAttachmentsThreshold": 3,
82+
"suspiciousAttachmentNamePattern": "(image|\\d{1,2})\\.[^.]{0,5}"
8183
},
8284
"wolframAlphaAppId": "79J52T-6239TVXHR7",
8385
"helpSystem": {

application/src/main/java/org/togetherjava/tjbot/config/ScamBlockerConfig.java

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ public final class ScamBlockerConfig {
2323
private final Set<String> hostBlacklist;
2424
private final Set<String> suspiciousHostKeywords;
2525
private final int isHostSimilarToKeywordDistanceThreshold;
26+
private final int suspiciousAttachmentsThreshold;
27+
private final String suspiciousAttachmentNamePattern;
2628

2729
@JsonCreator(mode = JsonCreator.Mode.PROPERTIES)
2830
private ScamBlockerConfig(@JsonProperty(value = "mode", required = true) Mode mode,
@@ -37,7 +39,11 @@ private ScamBlockerConfig(@JsonProperty(value = "mode", required = true) Mode mo
3739
@JsonProperty(value = "suspiciousHostKeywords",
3840
required = true) Set<String> suspiciousHostKeywords,
3941
@JsonProperty(value = "isHostSimilarToKeywordDistanceThreshold",
40-
required = true) int isHostSimilarToKeywordDistanceThreshold) {
42+
required = true) int isHostSimilarToKeywordDistanceThreshold,
43+
@JsonProperty(value = "suspiciousAttachmentsThreshold",
44+
required = true) int suspiciousAttachmentsThreshold,
45+
@JsonProperty(value = "suspiciousAttachmentNamePattern",
46+
required = true) String suspiciousAttachmentNamePattern) {
4147
this.mode = Objects.requireNonNull(mode);
4248
this.reportChannelPattern = Objects.requireNonNull(reportChannelPattern);
4349
this.botTrapChannelPattern = Objects.requireNonNull(botTrapChannelPattern);
@@ -46,6 +52,9 @@ private ScamBlockerConfig(@JsonProperty(value = "mode", required = true) Mode mo
4652
this.hostBlacklist = new HashSet<>(Objects.requireNonNull(hostBlacklist));
4753
this.suspiciousHostKeywords = new HashSet<>(Objects.requireNonNull(suspiciousHostKeywords));
4854
this.isHostSimilarToKeywordDistanceThreshold = isHostSimilarToKeywordDistanceThreshold;
55+
this.suspiciousAttachmentsThreshold = suspiciousAttachmentsThreshold;
56+
this.suspiciousAttachmentNamePattern =
57+
Objects.requireNonNull(suspiciousAttachmentNamePattern);
4958
}
5059

5160
/**
@@ -125,6 +134,26 @@ public int getIsHostSimilarToKeywordDistanceThreshold() {
125134
return isHostSimilarToKeywordDistanceThreshold;
126135
}
127136

137+
/**
138+
* Gets the minimum amount of suspicious attachments that are required in a message to flag it
139+
* as suspicious for its contained attachments.
140+
*
141+
* @return the minimum amount of suspicious attachments
142+
*/
143+
public int getSuspiciousAttachmentsThreshold() {
144+
return suspiciousAttachmentsThreshold;
145+
}
146+
147+
/**
148+
* Gets the REGEX pattern used to identify an attachment file name that is considered
149+
* suspicious. The file name includes the extension.
150+
*
151+
* @return the attachment file name pattern
152+
*/
153+
public String getSuspiciousAttachmentNamePattern() {
154+
return suspiciousAttachmentNamePattern;
155+
}
156+
128157
/**
129158
* Mode of a scam blocker. Controls which actions it takes when detecting scam.
130159
*/

application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamBlocker.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,7 @@ public void onMessageReceived(MessageReceivedEvent event) {
137137
}
138138

139139
Message message = event.getMessage();
140-
String content = message.getContentDisplay();
141-
if (isSafe && scamDetector.isScam(content)) {
140+
if (isSafe && scamDetector.isScam(message)) {
142141
isSafe = false;
143142
}
144143

application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetector.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
package org.togetherjava.tjbot.features.moderation.scam;
22

3+
import net.dv8tion.jda.api.entities.Message;
4+
35
import org.togetherjava.tjbot.config.Config;
46
import org.togetherjava.tjbot.config.ScamBlockerConfig;
57
import org.togetherjava.tjbot.features.utils.StringDistances;
68

79
import java.net.URI;
10+
import java.util.Collection;
11+
import java.util.List;
812
import java.util.Locale;
13+
import java.util.function.Predicate;
914
import java.util.regex.Pattern;
1015
import java.util.stream.Stream;
1116

@@ -18,6 +23,7 @@
1823
public final class ScamDetector {
1924
private static final Pattern TOKENIZER = Pattern.compile("[\\s,]");
2025
private final ScamBlockerConfig config;
26+
private final Predicate<String> isSuspiciousAttachmentName;
2127

2228
/**
2329
* Creates a new instance with the given configuration
@@ -26,6 +32,26 @@ public final class ScamDetector {
2632
*/
2733
public ScamDetector(Config config) {
2834
this.config = config.getScamBlocker();
35+
isSuspiciousAttachmentName =
36+
Pattern.compile(config.getScamBlocker().getSuspiciousAttachmentNamePattern())
37+
.asMatchPredicate();
38+
}
39+
40+
/**
41+
* Detects whether the given message classifies as scam or not, using certain heuristics.
42+
*
43+
* @param message the message to analyze
44+
* @return Whether the message classifies as scam
45+
*/
46+
public boolean isScam(Message message) {
47+
String content = message.getContentDisplay();
48+
List<Message.Attachment> attachments = message.getAttachments();
49+
50+
if (content.isBlank()) {
51+
return areAttachmentsSuspicious(attachments);
52+
}
53+
54+
return isScam(content);
2955
}
3056

3157
/**
@@ -123,6 +149,16 @@ private boolean containsSuspiciousKeyword(String token) {
123149
});
124150
}
125151

152+
private boolean areAttachmentsSuspicious(Collection<? extends Message.Attachment> attachments) {
153+
long suspiciousAttachments =
154+
attachments.stream().filter(this::isAttachmentSuspicious).count();
155+
return suspiciousAttachments >= config.getSuspiciousAttachmentsThreshold();
156+
}
157+
158+
private boolean isAttachmentSuspicious(Message.Attachment attachment) {
159+
return attachment.isImage() && isSuspiciousAttachmentName.test(attachment.getFileName());
160+
}
161+
126162
private boolean isHostSimilarToKeyword(String host, String keyword) {
127163
// NOTE This algorithm is far from optimal.
128164
// It is good enough for our purpose though and not that complex.

application/src/test/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetectorTest.java

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package org.togetherjava.tjbot.features.moderation.scam;
22

3+
import net.dv8tion.jda.api.entities.Message;
34
import org.junit.jupiter.api.BeforeEach;
45
import org.junit.jupiter.api.DisplayName;
56
import org.junit.jupiter.api.Test;
@@ -9,6 +10,8 @@
910
import org.togetherjava.tjbot.config.Config;
1011
import org.togetherjava.tjbot.config.ScamBlockerConfig;
1112

13+
import java.util.ArrayList;
14+
import java.util.Collections;
1215
import java.util.List;
1316
import java.util.Set;
1417

@@ -18,6 +21,9 @@
1821
import static org.mockito.Mockito.when;
1922

2023
final class ScamDetectorTest {
24+
private static final int SUSPICIOUS_ATTACHMENTS_THRESHOLD = 3;
25+
private static final String SUSPICIOUS_ATTACHMENT_NAME = "scam.png";
26+
2127
private ScamDetector scamDetector;
2228

2329
@BeforeEach
@@ -38,6 +44,10 @@ void setUp() {
3844
when(scamConfig.getSuspiciousHostKeywords())
3945
.thenReturn(Set.of("discord", "nitro", "premium", "free", "cheat", "crypto", "tele"));
4046
when(scamConfig.getIsHostSimilarToKeywordDistanceThreshold()).thenReturn(2);
47+
when(scamConfig.getSuspiciousAttachmentsThreshold())
48+
.thenReturn(SUSPICIOUS_ATTACHMENTS_THRESHOLD);
49+
when(scamConfig.getSuspiciousAttachmentNamePattern())
50+
.thenReturn(SUSPICIOUS_ATTACHMENT_NAME);
4151

4252
scamDetector = new ScamDetector(config);
4353
}
@@ -121,6 +131,94 @@ void websitesWithTooManyDifferencesAreNotSuspicious() {
121131
assertFalse(isScamResult);
122132
}
123133

134+
@Test
135+
@DisplayName("Messages containing multiple suspicious attachments are flagged as scam")
136+
void detectsSuspiciousAttachments() {
137+
// GIVEN an empty message containing suspicious attachments
138+
String content = "";
139+
Message.Attachment attachment = createImageAttachmentMock(SUSPICIOUS_ATTACHMENT_NAME);
140+
List<Message.Attachment> attachments =
141+
Collections.nCopies(SUSPICIOUS_ATTACHMENTS_THRESHOLD, attachment);
142+
Message message = createMessageMock(content, attachments);
143+
144+
// WHEN analyzing it
145+
boolean isScamResult = scamDetector.isScam(message);
146+
147+
// THEN flags it as scam
148+
assertTrue(isScamResult);
149+
}
150+
151+
@Test
152+
@DisplayName("Messages containing text content are not flagged for suspicious attachments")
153+
void ignoresAttachmentsIfContentProvided() {
154+
// GIVEN a non-empty message containing suspicious attachments
155+
String content = "Hello World";
156+
Message.Attachment attachment = createImageAttachmentMock(SUSPICIOUS_ATTACHMENT_NAME);
157+
List<Message.Attachment> attachments =
158+
Collections.nCopies(SUSPICIOUS_ATTACHMENTS_THRESHOLD, attachment);
159+
Message message = createMessageMock(content, attachments);
160+
161+
// WHEN analyzing it
162+
boolean isScamResult = scamDetector.isScam(message);
163+
164+
// THEN flags it as harmless
165+
assertFalse(isScamResult);
166+
}
167+
168+
@Test
169+
@DisplayName("Messages containing not enough suspicious attachments are not flagged")
170+
void ignoresIfNotEnoughSuspiciousAttachments() {
171+
// GIVEN an empty message containing some, but not enough suspicious attachments
172+
String content = "";
173+
174+
Message.Attachment badAttachment = createImageAttachmentMock(SUSPICIOUS_ATTACHMENT_NAME);
175+
Message.Attachment goodAttachment = createImageAttachmentMock("good.png");
176+
int badAttachmentAmount = SUSPICIOUS_ATTACHMENTS_THRESHOLD - 1;
177+
List<Message.Attachment> attachments =
178+
new ArrayList<>(Collections.nCopies(badAttachmentAmount, badAttachment));
179+
attachments.add(goodAttachment);
180+
181+
Message message = createMessageMock(content, attachments);
182+
183+
// WHEN analyzing it
184+
boolean isScamResult = scamDetector.isScam(message);
185+
186+
// THEN flags it as harmless
187+
assertFalse(isScamResult);
188+
}
189+
190+
@Test
191+
@DisplayName("Messages containing harmless attachments are not flagged")
192+
void ignoresHarmlessAttachments() {
193+
// GIVEN an empty message containing only harmless attachments
194+
String content = "";
195+
Message.Attachment attachment = createImageAttachmentMock("good.png");
196+
List<Message.Attachment> attachments =
197+
Collections.nCopies(SUSPICIOUS_ATTACHMENTS_THRESHOLD, attachment);
198+
Message message = createMessageMock(content, attachments);
199+
200+
// WHEN analyzing it
201+
boolean isScamResult = scamDetector.isScam(message);
202+
203+
// THEN flags it as harmless
204+
assertFalse(isScamResult);
205+
}
206+
207+
private static Message createMessageMock(String content, List<Message.Attachment> attachments) {
208+
Message message = mock(Message.class);
209+
when(message.getContentRaw()).thenReturn(content);
210+
when(message.getContentDisplay()).thenReturn(content);
211+
when(message.getAttachments()).thenReturn(attachments);
212+
return message;
213+
}
214+
215+
private static Message.Attachment createImageAttachmentMock(String name) {
216+
Message.Attachment attachment = mock(Message.Attachment.class);
217+
when(attachment.isImage()).thenReturn(true);
218+
when(attachment.getFileName()).thenReturn(name);
219+
return attachment;
220+
}
221+
124222
private static List<String> provideRealScamMessages() {
125223
return List.of("""
126224
🤩bro steam gived nitro - https://nitro-ds.online/LfgUfMzqYyx12""",

0 commit comments

Comments
 (0)