Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use a single high-quality, well-tested unfurlable url regex for both places Izzy needs this check #527

Merged
merged 3 commits into from
Nov 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Izzy-Moonbot/Helpers/DiscordHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -355,4 +355,11 @@ public static string DisplayName(IIzzyUser user, IIzzyGuild? guild)
var member = guild?.GetUser(user.Id);
return member != null ? member.DisplayName : user.GlobalName ?? user.Username;
}

// In Discord, <> angle brackets around a url prevent it from being automatically unfurled.
// Izzy often wants to identify urls in a message that *will* unfurl, so we need a reliable way
// to identify urls that aren't enclosed by <>.
// This is essentially https://stackoverflow.com/a/3809435 with added lookaround for <>s.
public static Regex UnfurlableUrl =
new(@"(?<!<)(https?://(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&//=]*))(?!>)");
}
2 changes: 1 addition & 1 deletion Izzy-Moonbot/Modules/QuotesModule.cs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ private static string SanitizeQuote(string quote)
{
// Find all unfurling urls in the given string and surround them with <>. If any are already non-unfurling then ignore them
// Regex taken from https://stackoverflow.com/a/3809435
return Regex.Replace(quote, @"(?<!<)(https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*))(?!>)", "<$&>");
return DiscordHelper.UnfurlableUrl.Replace(quote, "<$&>");
}

[Command("quote")]
Expand Down
27 changes: 2 additions & 25 deletions Izzy-Moonbot/Service/SpamService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ public class SpamService
private readonly TransientState _state;

private readonly Regex _mention = new("<@&?[0-9]+>");
private readonly Regex _url = new("https?://(.+\\.)*(.+)\\.([A-z]+)(/?.+)*", RegexOptions.IgnoreCase);
private readonly Regex _noUnfurlUrl = new("<{1}https?://(.+\\.)*(.+)\\.([A-z]+)(/?.+)*>{1}", RegexOptions.IgnoreCase);
/*
* The test string is a way to test the spam filter without actually spamming
* The test string is programmed to immediately set pressure to Config.SpamMaxPressure.
Expand Down Expand Up @@ -109,30 +107,9 @@ private void calculateMessagePressureWithoutDecay(RecentMessage message, RecentM
}

// Check if there's at least one url in the message (and there's no embeds)
if (_url.IsMatch(message.Content) && message.EmbedsCount == 0)
if (DiscordHelper.UnfurlableUrl.IsMatch(message.Content) && message.EmbedsCount == 0)
{
// Because url pressure can occur multiple times, we store the pressure to add here
var totalMatches = 0;

// Go through each "word" because the URL regex is funky
foreach (var content in message.Content.Split(" "))
{
// Filter out matches
var matches = _url.Matches(content).ToList();
foreach (Match match in _noUnfurlUrl.Matches(content))
{
// Check if url is in fact set to not unfurl
var matchToRemove = matches.Find(urlMatch => match.Value.Contains(urlMatch.Value));
// If not, just continue
if (matchToRemove == null) continue;

// If it is, remove the match.
matches.Remove(matchToRemove);
}

totalMatches += matches.Count;
}

var totalMatches = DiscordHelper.UnfurlableUrl.Count(message.Content);
var embedPressure = Math.Round(_config.SpamImagePressure * totalMatches, 2);
if (embedPressure > 0.0)
{
Expand Down
29 changes: 29 additions & 0 deletions Izzy-MoonbotTests/Tests/DiscordHelperTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -220,4 +220,33 @@ public void LevenshteinDistance_Tests()
Assert.IsTrue(WithinLevenshteinDistanceOf("SpamMaxPressure", "SpamPressureMax", 6));
Assert.IsTrue(WithinLevenshteinDistanceOf("SpamMaxPressure", "SpamPressureMax", 7));
}

[TestMethod()]
public void UnfurlableUrlRegex_Tests()
{
Assert.AreEqual(UnfurlableUrl.Count(""), 0);
Assert.AreEqual(UnfurlableUrl.Count("this is normal text"), 0);

Assert.AreEqual(UnfurlableUrl.Count("http://stackoverflow.com/a/3809435"), 1);
Assert.AreEqual(UnfurlableUrl.Count("https://stackoverflow.com/a/3809435"), 1);
Assert.AreEqual(UnfurlableUrl.Count("<https://stackoverflow.com/a/3809435>"), 0);
Assert.AreEqual(UnfurlableUrl.Count("https://stackoverflow.com/a/3809435 https://stackoverflow.com/a/3809435 https://stackoverflow.com/a/3809435"), 3);
Assert.AreEqual(UnfurlableUrl.Count("<https://stackoverflow.com/a/3809435> https://stackoverflow.com/a/3809435 <https://stackoverflow.com/a/3809435>"), 1);
Assert.AreEqual(UnfurlableUrl.Count("<https://stackoverflow.com/a/3809435> <https://stackoverflow.com/a/3809435> <https://stackoverflow.com/a/3809435>"), 0);

Assert.AreEqual(UnfurlableUrl.Count("htt://stackoverflow.com/a/3809435"), 0);
Assert.AreEqual(UnfurlableUrl.Count("httpss://stackoverflow.com/a/3809435"), 0);
Assert.AreEqual(UnfurlableUrl.Count("https:/stackoverflow.com/a/3809435"), 0);
Assert.AreEqual(UnfurlableUrl.Count("http//stackoverflow.com/a/3809435"), 0);

Assert.AreEqual(UnfurlableUrl.Count("http://www.stackoverflow.com/a/3809435"), 1);
Assert.AreEqual(UnfurlableUrl.Count("http://w.stackoverflow.com/a/3809435"), 1);
Assert.AreEqual(UnfurlableUrl.Count("http://abc.stackoverflow.com/a/3809435"), 1);
Assert.AreEqual(UnfurlableUrl.Count("http://stackoverflow.com/"), 1);
Assert.AreEqual(UnfurlableUrl.Count("http://stackoverflow.com"), 1);
Assert.AreEqual(UnfurlableUrl.Count("http://stackoverflow.c"), 1);

Assert.AreEqual(UnfurlableUrl.Count("http://stackoverflow."), 0);
Assert.AreEqual(UnfurlableUrl.Count("http://stackoverflow"), 0);
}
}
Loading