Skip to content

Commit e6522be

Browse files
authored
Scam Blocker fine-tuning (#1281)
* added new scam and false positives, fine-tuned, ability to use "foo$" * exact matches (^foo$)
1 parent 7c643c2 commit e6522be

File tree

3 files changed

+163
-20
lines changed

3 files changed

+163
-20
lines changed

application/config.json.template

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,21 +38,27 @@
3838
"trading",
3939
"whatsapp",
4040
"crypto",
41-
"claim",
41+
"^claim",
4242
"teen",
4343
"adobe",
4444
"hack",
4545
"steamcommunity",
4646
"freenitro",
47-
"^earn",
48-
".exe"
47+
"^earn$",
48+
"^earning",
49+
".exe$"
4950
],
5051
"hostWhitelist": [
5152
"discord.com",
5253
"discord.media",
5354
"discordapp.com",
5455
"discordapp.net",
55-
"discordstatus.com"
56+
"discordstatus.com",
57+
"thehackernews.com",
58+
"gradle.org",
59+
"help.gradle.org",
60+
"youtube.com",
61+
"www.youtube.com"
5662
],
5763
"hostBlacklist": [
5864
"bit.ly",
@@ -66,7 +72,8 @@
6672
"telegra.ph",
6773
"shorturl.at",
6874
"cheatings.xyz",
69-
"transfer.sh"
75+
"transfer.sh",
76+
"tobimoller.space"
7077
],
7178
"suspiciousHostKeywords": [
7279
"discord",
@@ -75,7 +82,8 @@
7582
"free",
7683
"cheat",
7784
"crypto",
78-
"tele"
85+
"telegra",
86+
"telety"
7987
],
8088
"isHostSimilarToKeywordDistanceThreshold": 2,
8189
"suspiciousAttachmentsThreshold": 3,

application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetector.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import java.util.Collection;
1111
import java.util.List;
1212
import java.util.Locale;
13+
import java.util.StringJoiner;
1314
import java.util.function.Predicate;
1415
import java.util.regex.Pattern;
1516
import java.util.stream.Stream;
@@ -141,10 +142,18 @@ private boolean containsSuspiciousKeyword(String token) {
141142
.stream()
142143
.map(keyword -> keyword.toLowerCase(Locale.US))
143144
.anyMatch(keyword -> {
145+
// Exact match "^foo$"
146+
if (startsWith(keyword, '^') && endsWith(keyword, '$')) {
147+
return preparedToken.equals(keyword.substring(1, keyword.length() - 1));
148+
}
144149
// Simple regex-inspired syntax "^foo"
145150
if (startsWith(keyword, '^')) {
146151
return preparedToken.startsWith(keyword.substring(1));
147152
}
153+
// Simple regex-inspired syntax "foo$"
154+
if (endsWith(keyword, '$')) {
155+
return preparedToken.endsWith(keyword.substring(0, keyword.length() - 1));
156+
}
148157
return preparedToken.contains(keyword);
149158
});
150159
}
@@ -186,11 +195,26 @@ private static boolean startsWith(CharSequence text, char prefixToTest) {
186195
return !text.isEmpty() && text.charAt(0) == prefixToTest;
187196
}
188197

198+
private static boolean endsWith(CharSequence text, char suffixToTest) {
199+
return !text.isEmpty() && text.charAt(text.length() - 1) == suffixToTest;
200+
}
201+
189202
private static class AnalyseResults {
190203
private boolean pingsEveryone;
191204
private boolean containsSuspiciousKeyword;
192205
private boolean containsDollarSign;
193206
private boolean hasUrl;
194207
private boolean hasSuspiciousUrl;
208+
209+
@Override
210+
public String toString() {
211+
return new StringJoiner(", ", AnalyseResults.class.getSimpleName() + "[", "]")
212+
.add("pingsEveryone=" + pingsEveryone)
213+
.add("containsSuspiciousKeyword=" + containsSuspiciousKeyword)
214+
.add("containsDollarSign=" + containsDollarSign)
215+
.add("hasUrl=" + hasUrl)
216+
.add("hasSuspiciousUrl=" + hasSuspiciousUrl)
217+
.toString();
218+
}
195219
}
196220
}

application/src/test/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetectorTest.java

Lines changed: 125 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,18 @@ void setUp() {
3232
ScamBlockerConfig scamConfig = mock(ScamBlockerConfig.class);
3333
when(config.getScamBlocker()).thenReturn(scamConfig);
3434

35-
when(scamConfig.getSuspiciousKeywords())
36-
.thenReturn(Set.of("nitro", "boob", "sexy", "sexi", "esex", "steam", "gift", "onlyfans",
37-
"bitcoin", "btc", "promo", "trader", "trading", "whatsapp", "crypto", "claim",
38-
"teen", "adobe", "hack", "steamcommunity", "freenitro", "^earn", ".exe"));
35+
when(scamConfig.getSuspiciousKeywords()).thenReturn(Set.of("nitro", "boob", "sexy", "sexi",
36+
"esex", "steam", "gift", "onlyfans", "bitcoin", "btc", "promo", "trader", "trading",
37+
"whatsapp", "crypto", "^claim", "teen", "adobe", "hack", "steamcommunity",
38+
"freenitro", "^earn$", "^earning", ".exe$"));
3939
when(scamConfig.getHostWhitelist()).thenReturn(Set.of("discord.com", "discord.media",
40-
"discordapp.com", "discordapp.net", "discordstatus.com"));
40+
"discordapp.com", "discordapp.net", "discordstatus.com", "thehackernews.com",
41+
"gradle.org", "help.gradle.org", "youtube.com", "www.youtube.com"));
4142
when(scamConfig.getHostBlacklist()).thenReturn(Set.of("bit.ly", "discord.gg", "teletype.in",
4243
"t.me", "corematrix.us", "u.to", "steamcommunity.com", "goo.su", "telegra.ph",
43-
"shorturl.at", "cheatings.xyz", "transfer.sh"));
44-
when(scamConfig.getSuspiciousHostKeywords())
45-
.thenReturn(Set.of("discord", "nitro", "premium", "free", "cheat", "crypto", "tele"));
44+
"shorturl.at", "cheatings.xyz", "transfer.sh", "tobimoller.space"));
45+
when(scamConfig.getSuspiciousHostKeywords()).thenReturn(Set.of("discord", "nitro",
46+
"premium", "free", "cheat", "crypto", "telegra", "telety"));
4647
when(scamConfig.getIsHostSimilarToKeywordDistanceThreshold()).thenReturn(2);
4748
when(scamConfig.getSuspiciousAttachmentsThreshold())
4849
.thenReturn(SUSPICIOUS_ATTACHMENTS_THRESHOLD);
@@ -335,14 +336,124 @@ private static List<String> provideRealScamMessages() {
335336
Or via TG: https://t.me/Charlie_Adamo
336337
""",
337338
"Urgently looking for mods & collab managers https://discord.gg/cryptohireo",
338-
"Check this - https://transfer.sh/get/ajmkh3l7tzop/Setup.exe");
339+
"Check this - https://transfer.sh/get/ajmkh3l7tzop/Setup.exe",
340+
"""
341+
Secrets of the crypto market that top traders don’t want you to know! I’m looking to help some individuals who
342+
are serious about earning over $100K weekly in the market. Remember, I’ll require just 15% of your profits once
343+
you start seeing earnings. Note: I’m only looking for serious and truly interested individuals.
344+
Text me on TG/WhatApps for more info on how to get started +(123)123-1230 https://t.me/officialjohnsmith""",
345+
"""
346+
💻 Senior Full Stack Engineer | 8+ Years Experience with me
347+
Hi, I’m a Senior Software Engineer with over 8 years of experience building scalable website, cloud-native software solutions across industries like healthcare, fintech, e-commerce, gaming, logistics, and energy.
348+
🧰 Core Skills:
349+
Frontend: React, Vue, Angular, Next.js, TypeScript, Web3 integration, Svelte, Three.js, Pixi.js
350+
Backend: Node.js, NestJS, PHP (Laravel, Symfony), Python (FastAPI/Flask), .Net, Rails
351+
Databases: MongoDB, MySQL, PostgreSQL, Redis
352+
Ecommerce platforms: MedusaJS, MercurJS, Shopify (Gadget)
353+
Automation & Bots: Token Swap / Trading Bots, AI/ML & Generative AI & CRM, Automation online sites
354+
🔍 Notable Projects:
355+
Property Shield: Scalable backend with NestJS, Redis Streams, MongoDB, Supabase
356+
Ready Education: Frontend state architecture with NgRx, Next / Vue, TypeScript with Web3,
357+
Kozoom Multimedia: Secure enterprise login using React, Redux, Azure
358+
B2CWorkflow Builder (React Flow)
359+
📂 Portfolio: https://tobimoller.space/
360+
📬 Open to freelance gigs, contracts, and bounties — let’s talk!""",
361+
"""
362+
I'll help the first 10 people interested on how to start earning $100k or more within a week,
363+
but you will reimburse me 10% of your profits when you receive it. Note: only interested people should
364+
send a friend request or send me a dm! ask me (HOW) via Telegram username @JohnSmith_123""",
365+
"""
366+
Ready to unlock your earning potential in the digital market? you can start earning $100,000 and even more
367+
as a beginner from the digital market, DM me for expert guidance or contact me directly on telegram and start building your financial future.
368+
Telegram username @JohnSmith123""",
369+
"Grab it before it's deleted (available for Windows and macOS): https://www.reddit.com/r/TVBaFreeHub/comments/12345t/ninaatradercrackedfullpowertradingfreefor123/");
339370
}
340371

341372
private static List<String> provideRealFalsePositiveMessages() {
342-
return List
343-
.of("""
344-
https://learn.microsoft.com/en-us/dotnet/csharp/fundamentals/types/anonymous-types""",
345-
"""
346-
And according to quick google search. Median wage is about $23k usd""");
373+
return List.of(
374+
"""
375+
https://learn.microsoft.com/en-us/dotnet/csharp/fundamentals/types/anonymous-types""",
376+
"And according to quick google search. Median wage is about $23k usd",
377+
"""
378+
$ docker image prune -a
379+
WARNING! This will remove all images without at least one container associated to them.
380+
Are you sure you want to continue? [y/N] y
381+
...
382+
Total reclaimed space: 37.73GB""",
383+
"""
384+
Exception in thread "main" java.lang.NoSuchMethodError: 'java.lang.String org.junit.platform.engine.discovery.MethodSelector.getMethodParameterTypes()'
385+
at com.intellij.junit5.JUnit5TestRunnerUtil.loadMethodByReflection(JUnit5TestRunnerUtil.java:127)
386+
at com.intellij.junit5.JUnit5TestRunnerUtil.buildRequest(JUnit5TestRunnerUtil.java:102)
387+
at com.intellij.junit5.JUnit5IdeaTestRunner.startRunnerWithArgs(JUnit5IdeaTestRunner.java:43)
388+
at com.intellij.rt.junit.IdeaTestRunner$Repeater$1.execute(IdeaTestRunner.java:38)
389+
at com.intellij.rt.execution.junit.TestsRepeater.repeat(TestsRepeater.java:11)
390+
at com.intellij.rt.junit.IdeaTestRunner$Repeater.startRunnerWithArgs(IdeaTestRunner.java:35)
391+
at com.intellij.rt.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:232)
392+
at com.intellij.rt.junit.JUnitStarter.main(JUnitStarter.java:55)""",
393+
"""
394+
The average wage here (not the median, which is lower) gives you a take-home of about $68k in New Zealand dollars.
395+
The median house-price in my city (which is not at all the most expensive city) is ~$740k.
396+
That's an 11 year save for an average earner for an average house without spending anything.""",
397+
"https://thehackernews.com/2025/07/alert-exposed-jdwp-interfaces-lead-to.html",
398+
"""
399+
~/Developer/TJ-Bot develop ❯ ./gradlew build 10:20:05 PM
400+
FAILURE: Build failed with an exception.
401+
What went wrong:
402+
class name.remal.gradleplugins.sonarlint.SonarLintPlugin
403+
tried to access private field org.gradle.api.plugins.quality.internal.AbstractCodeQualityPlugin.extension
404+
(name.remal.gradleplugins.sonarlint.SonarLintPlugin is in unnamed module of loader
405+
org.gradle.internal.classloader.VisitableURLClassLoader$InstrumentingVisitableURLClassLoader @55f4c79b;
406+
org.gradle.api.plugins.quality.internal.AbstractCodeQualityPlugin is in unnamed module of
407+
loader org.gradle.initialization.MixInLegacyTypesClassLoader @49b2a47d)
408+
Try:
409+
Run with --stacktrace option to get the stack trace.
410+
Run with --info or --debug option to get more log output.
411+
Run with --scan to get full insights.
412+
Get more help at https://help.gradle.org/.
413+
BUILD FAILED in 795ms
414+
7 actionable tasks: 7 up-to-date
415+
~/Developer/TJ-Bot develop ❯""",
416+
"""
417+
For example. I enter 3.45 for the price and 3 for the count. It results in 10.350000000000001 for some reason. I followed Bro Code's video:
418+
https://www.youtube.com/watch?v=P8CVPIaRmys&list=PLZPZq0rRZOOjNOZYq_R2PECIMglLemc&index=6
419+
and his does not do this. Why is this?
420+
import java.util.Scanner;
421+
public class ShoppingCart {
422+
public static void main(String[] args){
423+
// Shopping Cart Arithmetic Practice
424+
Scanner input = new Scanner(System.in);
425+
String item;
426+
double price;
427+
int count;
428+
char currency = '$';
429+
double total;
430+
System.out.print("What item would you like to buy?: ");
431+
item = input.nextLine();
432+
System.out.print("What is the price of this item?: ");
433+
price = input.nextDouble();
434+
System.out.print("How many " + item + "(s) would you like to buy?: ");
435+
count = input.nextInt();
436+
total = price * count;
437+
System.out.println("\\nYou bought " + count + " " + item + "(s).\\n");
438+
System.out.println("Your total is " + currency + total);
439+
}
440+
}""",
441+
"@squidxtv https://cdn.steamusercontent.com/ugc/12827361819537692968/A7B3AC5A176E7B2287B5E84B9A0BE9754F5A6388/",
442+
"""
443+
today i understood, why security is joke, even for people on top
444+
https://micahsmith.com/ddosecrets-publishes-410-gb-of-heap-dumps-hacked-from-telemessages-archive-server/""",
445+
"""
446+
Hey guys @everyone, apologise for disturbing,
447+
I wanted to ask what's the scope of Java in future like after 2030 in USA, like the newer frameworks will
448+
replace Spring Boot ... and how AI will play it role ...
449+
I am very much confused, what to do, I tired exploring Machine Learning, but I don't know why it felt more
450+
like a burden then enjoyment, but spring boot was fun, although exploring microservice architecture
451+
is was tricky mostly when it came to deployment and it become really confusing...""",
452+
"https://www.cloudflare.com/learning/email-security/dmarc-dkim-spf/",
453+
"""
454+
It was pretty pricey, and the costs likely differ a lot from country to country
455+
(keeping in mind that a portion is importing of equipment to NZ and some is labour in a very different market).
456+
We have 13.5KW of storage, a 10KW inverter, 11.5KW of generation and an EV charger.
457+
All up, on a 1% 'green loan', it was $40k NZD (~$23k USD)""");
347458
}
348459
}

0 commit comments

Comments
 (0)