diff --git a/LinkCrawler/LinkCrawler.Tests/UtilsTests/ExtensionsTests/StringExtensionsTests.cs b/LinkCrawler/LinkCrawler.Tests/UtilsTests/ExtensionsTests/StringExtensionsTests.cs index 392bec1..2759377 100644 --- a/LinkCrawler/LinkCrawler.Tests/UtilsTests/ExtensionsTests/StringExtensionsTests.cs +++ b/LinkCrawler/LinkCrawler.Tests/UtilsTests/ExtensionsTests/StringExtensionsTests.cs @@ -118,5 +118,14 @@ public void StartsWithIgnoreCase_DifferentLetterAndSameCase_True() var result = word.StartsWithIgnoreCase(letter); Assert.AreEqual(false, result); } + + [Test] + public void StartsWithIgnoreCase_NullAndNull_True() + { + string word = null; + string letter = null; + var result = StringExtensions.StartsWithIgnoreCase(null, null); + Assert.AreEqual(true, result); + } } } diff --git a/LinkCrawler/LinkCrawler.Tests/UtilsTests/HelpersTests/ValidUrlParserTests.cs b/LinkCrawler/LinkCrawler.Tests/UtilsTests/HelpersTests/ValidUrlParserTests.cs index b19c52f..b1652c0 100644 --- a/LinkCrawler/LinkCrawler.Tests/UtilsTests/HelpersTests/ValidUrlParserTests.cs +++ b/LinkCrawler/LinkCrawler.Tests/UtilsTests/HelpersTests/ValidUrlParserTests.cs @@ -8,6 +8,7 @@ namespace LinkCrawler.Tests.UtilsTests.HelpersTests public class ValidUrlParserTests { public ValidUrlParser ValidUrlParser { get; set; } + [SetUp] public void SetUp() { @@ -42,9 +43,58 @@ public void Parse_UrlOnlyRelativePath_True() string parsed; var result = ValidUrlParser.Parse(relativeUrl, out parsed); Assert.That(result, Is.True); - var validUrl = string.Format("{0}{1}",ValidUrlParser.BaseUrl, relativeUrl); + var validUrl = string.Format("{0}{1}", ValidUrlParser.BaseUrl, relativeUrl); Assert.That(parsed, Is.EqualTo(validUrl)); } + + [Test] + public void When_the_url_is_empty_then_it_is_not_a_valid_url() + { + string url; + Assert.IsFalse(ValidUrlParser.Parse("", out url)); + } + + [Test] + public void A_url_that_does_not_match_the_ValidUrlRegex_is_no_valid_url() + { + string url; + + Assert.AreEqual(@"(^http[s]?:\/{2})|(^www)|(^\/{1,2})", new Settings().ValidUrlRegex, + "This test is coded against this Regex. A change in the config could make it invalid."); + Assert.IsFalse(ValidUrlParser.Parse("ftp://invalid.url", out url)); + } + + [Test] + public void An_absolute_http_url_will_be_parsed_and_not_be_changed() + { + string url; + Assert.IsTrue(ValidUrlParser.Parse("http://www.google.de", out url)); + Assert.AreEqual("http://www.google.de", url); + } + + [Test] + public void A_relative_url_starting_with_a_slash_will_be_expanded_to_an_absolute_url() + { + string url; + Assert.AreEqual("https://github.com", new Settings().BaseUrl, "This test is coded against a configuration using this base url and will fail if the configuration is changed."); + Assert.IsTrue(ValidUrlParser.Parse("/oml", out url)); + Assert.AreEqual("https://github.com/oml", url); + } + + [Test] + public void An_url_without_a_scheme_will_get_http_prepended() + { + string url; + Assert.IsTrue(ValidUrlParser.Parse("//google.com", out url)); + Assert.AreEqual("http://google.com", url); + } + + [Test] + public void A_relative_url_not_starting_with_a_slash_will_not_be_parsed() + { + string url; + Assert.IsFalse(ValidUrlParser.Parse("index.html", out url)); + } } } diff --git a/LinkCrawler/LinkCrawler/Utils/Parsers/ValidUrlParser.cs b/LinkCrawler/LinkCrawler/Utils/Parsers/ValidUrlParser.cs index 54d9097..bbecc8b 100644 --- a/LinkCrawler/LinkCrawler/Utils/Parsers/ValidUrlParser.cs +++ b/LinkCrawler/LinkCrawler/Utils/Parsers/ValidUrlParser.cs @@ -5,6 +5,13 @@ namespace LinkCrawler.Utils.Parsers { + /// + /// Parses a given text to validate if it is a valid url. + /// + /// Some kinds of relative URLs are converted to absolute urls. + /// You can count on either getting a valid absolute url from this + /// class or getting a valid = false. + /// public class ValidUrlParser : IValidUrlParser { public Regex Regex { get; set; } @@ -34,18 +41,21 @@ public bool Parse(string url, out string validUrl) validUrl = url; return true; } + if (url.StartsWith("//")) { var newUrl = string.Concat("http:", url); validUrl = newUrl; return true; } + if (url.StartsWith("/")) { var newUrl = string.Concat(BaseUrl, url); validUrl = newUrl; return true; } + return false; } }