diff --git a/Izzy-Moonbot/Helpers/DiscordHelper.cs b/Izzy-Moonbot/Helpers/DiscordHelper.cs index 8dea2585..6924ebc3 100644 --- a/Izzy-Moonbot/Helpers/DiscordHelper.cs +++ b/Izzy-Moonbot/Helpers/DiscordHelper.cs @@ -355,4 +355,11 @@ public static string DisplayName(IIzzyUser user, IIzzyGuild? guild) var member = guild?.GetUser(user.Id); return member != null ? member.DisplayName : user.GlobalName ?? user.Username; } + + // In Discord, <> angle brackets around a url prevent it from being automatically unfurled. + // Izzy often wants to identify urls in a message that *will* unfurl, so we need a reliable way + // to identify urls that aren't enclosed by <>. + // This is essentially https://stackoverflow.com/a/3809435 with added lookaround for <>s. + public static Regex UnfurlableUrl = + new(@"(?)"); } diff --git a/Izzy-Moonbot/Modules/QuotesModule.cs b/Izzy-Moonbot/Modules/QuotesModule.cs index f393672a..f57b6470 100644 --- a/Izzy-Moonbot/Modules/QuotesModule.cs +++ b/Izzy-Moonbot/Modules/QuotesModule.cs @@ -63,7 +63,7 @@ private static string SanitizeQuote(string quote) { // Find all unfurling urls in the given string and surround them with <>. If any are already non-unfurling then ignore them // Regex taken from https://stackoverflow.com/a/3809435 - return Regex.Replace(quote, @"(?)", "<$&>"); + return DiscordHelper.UnfurlableUrl.Replace(quote, "<$&>"); } [Command("quote")] diff --git a/Izzy-Moonbot/Service/SpamService.cs b/Izzy-Moonbot/Service/SpamService.cs index 62f7fcdc..e56a5622 100644 --- a/Izzy-Moonbot/Service/SpamService.cs +++ b/Izzy-Moonbot/Service/SpamService.cs @@ -25,8 +25,6 @@ public class SpamService private readonly TransientState _state; private readonly Regex _mention = new("<@&?[0-9]+>"); - private readonly Regex _url = new("https?://(.+\\.)*(.+)\\.([A-z]+)(/?.+)*", RegexOptions.IgnoreCase); - private readonly Regex _noUnfurlUrl = new("<{1}https?://(.+\\.)*(.+)\\.([A-z]+)(/?.+)*>{1}", RegexOptions.IgnoreCase); /* * The test string is a way to test the spam filter without actually spamming * The test string is programmed to immediately set pressure to Config.SpamMaxPressure. @@ -109,30 +107,9 @@ private void calculateMessagePressureWithoutDecay(RecentMessage message, RecentM } // Check if there's at least one url in the message (and there's no embeds) - if (_url.IsMatch(message.Content) && message.EmbedsCount == 0) + if (DiscordHelper.UnfurlableUrl.IsMatch(message.Content) && message.EmbedsCount == 0) { - // Because url pressure can occur multiple times, we store the pressure to add here - var totalMatches = 0; - - // Go through each "word" because the URL regex is funky - foreach (var content in message.Content.Split(" ")) - { - // Filter out matches - var matches = _url.Matches(content).ToList(); - foreach (Match match in _noUnfurlUrl.Matches(content)) - { - // Check if url is in fact set to not unfurl - var matchToRemove = matches.Find(urlMatch => match.Value.Contains(urlMatch.Value)); - // If not, just continue - if (matchToRemove == null) continue; - - // If it is, remove the match. - matches.Remove(matchToRemove); - } - - totalMatches += matches.Count; - } - + var totalMatches = DiscordHelper.UnfurlableUrl.Count(message.Content); var embedPressure = Math.Round(_config.SpamImagePressure * totalMatches, 2); if (embedPressure > 0.0) { diff --git a/Izzy-MoonbotTests/Tests/DiscordHelperTests.cs b/Izzy-MoonbotTests/Tests/DiscordHelperTests.cs index 2d4636bb..8bda8406 100644 --- a/Izzy-MoonbotTests/Tests/DiscordHelperTests.cs +++ b/Izzy-MoonbotTests/Tests/DiscordHelperTests.cs @@ -220,4 +220,33 @@ public void LevenshteinDistance_Tests() Assert.IsTrue(WithinLevenshteinDistanceOf("SpamMaxPressure", "SpamPressureMax", 6)); Assert.IsTrue(WithinLevenshteinDistanceOf("SpamMaxPressure", "SpamPressureMax", 7)); } + + [TestMethod()] + public void UnfurlableUrlRegex_Tests() + { + Assert.AreEqual(UnfurlableUrl.Count(""), 0); + Assert.AreEqual(UnfurlableUrl.Count("this is normal text"), 0); + + Assert.AreEqual(UnfurlableUrl.Count("http://stackoverflow.com/a/3809435"), 1); + Assert.AreEqual(UnfurlableUrl.Count("https://stackoverflow.com/a/3809435"), 1); + Assert.AreEqual(UnfurlableUrl.Count(""), 0); + Assert.AreEqual(UnfurlableUrl.Count("https://stackoverflow.com/a/3809435 https://stackoverflow.com/a/3809435 https://stackoverflow.com/a/3809435"), 3); + Assert.AreEqual(UnfurlableUrl.Count(" https://stackoverflow.com/a/3809435 "), 1); + Assert.AreEqual(UnfurlableUrl.Count(" "), 0); + + Assert.AreEqual(UnfurlableUrl.Count("htt://stackoverflow.com/a/3809435"), 0); + Assert.AreEqual(UnfurlableUrl.Count("httpss://stackoverflow.com/a/3809435"), 0); + Assert.AreEqual(UnfurlableUrl.Count("https:/stackoverflow.com/a/3809435"), 0); + Assert.AreEqual(UnfurlableUrl.Count("http//stackoverflow.com/a/3809435"), 0); + + Assert.AreEqual(UnfurlableUrl.Count("http://www.stackoverflow.com/a/3809435"), 1); + Assert.AreEqual(UnfurlableUrl.Count("http://w.stackoverflow.com/a/3809435"), 1); + Assert.AreEqual(UnfurlableUrl.Count("http://abc.stackoverflow.com/a/3809435"), 1); + Assert.AreEqual(UnfurlableUrl.Count("http://stackoverflow.com/"), 1); + Assert.AreEqual(UnfurlableUrl.Count("http://stackoverflow.com"), 1); + Assert.AreEqual(UnfurlableUrl.Count("http://stackoverflow.c"), 1); + + Assert.AreEqual(UnfurlableUrl.Count("http://stackoverflow."), 0); + Assert.AreEqual(UnfurlableUrl.Count("http://stackoverflow"), 0); + } }