Skip to content

Commit

Permalink
Fixed bug in base URL when specified at the command line
Browse files Browse the repository at this point in the history
  • Loading branch information
ed-graham committed Sep 16, 2018
1 parent b2cda45 commit 76d101a
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 9 deletions.
2 changes: 1 addition & 1 deletion LinkCrawler/LinkCrawler/App.config
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
<add key="CheckImages" value="true"/>
<add key="BaseUrl" value="https://github.com"/>
<add key="SuccessHttpStatusCodes" value="1xx,2xx,3xx"/>
<add key="InterestingHttpStatusCodes" value="4xx,9xx"/>
<add key="InterestingHttpStatusCodes" value="*"/>
<!--explanation of regex below: http://regexr.com/3cqt9 -->
<add key="ValidUrlRegex" value="(^http[s]?:\/{2})|(^www)|(^\/{1,2})"/>
<add key="PrintSummary" value="true"/>
Expand Down
29 changes: 28 additions & 1 deletion LinkCrawler/LinkCrawler/LinkCrawler.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,21 @@
<FileAlignment>512</FileAlignment>
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
<TargetFrameworkProfile />
<PublishUrl>publish\</PublishUrl>
<Install>true</Install>
<InstallFrom>Disk</InstallFrom>
<UpdateEnabled>false</UpdateEnabled>
<UpdateMode>Foreground</UpdateMode>
<UpdateInterval>7</UpdateInterval>
<UpdateIntervalUnits>Days</UpdateIntervalUnits>
<UpdatePeriodically>false</UpdatePeriodically>
<UpdateRequired>false</UpdateRequired>
<MapFileExtensions>true</MapFileExtensions>
<ApplicationRevision>0</ApplicationRevision>
<ApplicationVersion>1.0.0.%2a</ApplicationVersion>
<IsWebBootstrapper>false</IsWebBootstrapper>
<UseApplicationTrust>false</UseApplicationTrust>
<BootstrapperEnabled>true</BootstrapperEnabled>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
Expand Down Expand Up @@ -64,6 +79,7 @@
<Reference Include="System.Configuration" />
<Reference Include="System.Core" />
<Reference Include="System.Web" />
<Reference Include="System.Web.Extensions" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
Expand Down Expand Up @@ -107,7 +123,18 @@
</None>
<None Include="packages.config" />
</ItemGroup>
<ItemGroup />
<ItemGroup>
<BootstrapperPackage Include=".NETFramework,Version=v4.6.1">
<Visible>False</Visible>
<ProductName>Microsoft .NET Framework 4.6.1 %28x86 and x64%29</ProductName>
<Install>true</Install>
</BootstrapperPackage>
<BootstrapperPackage Include="Microsoft.Net.Framework.3.5.SP1">
<Visible>False</Visible>
<ProductName>.NET Framework 3.5 SP1</ProductName>
<Install>false</Install>
</BootstrapperPackage>
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
Expand Down
6 changes: 4 additions & 2 deletions LinkCrawler/LinkCrawler/Models/ResponseModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,15 @@ public override string ToString()
{
if (!IsSuccess)
{
// cater for HTTP "999" codes returned
string statusCode = StatusCodeNumber == 999 ? "[Request denied]" : StatusCode.ToString();
if (!String.IsNullOrEmpty(ErrorMessage))
{
return $"{StatusCodeNumber}\t{StatusCode}\t{RequestedUrl}{Environment.NewLine}\tError:\t{ErrorMessage}{Environment.NewLine}\tReferer:\t{ReferrerUrl}";
return $"{StatusCodeNumber}\t{statusCode}\t{RequestedUrl}{Environment.NewLine}\tError:\t{ErrorMessage}{Environment.NewLine}\tReferer:\t{ReferrerUrl}";
}
else
{
return $"{StatusCodeNumber}\t{StatusCode}\t{RequestedUrl}{Environment.NewLine}\tReferer:\t{ReferrerUrl}";
return $"{StatusCodeNumber}\t{statusCode}\t{RequestedUrl}{Environment.NewLine}\tReferer:\t{ReferrerUrl}";
}
}
else
Expand Down
20 changes: 15 additions & 5 deletions LinkCrawler/LinkCrawler/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,25 @@ static void Main(string[] args)
using (var container = Container.For<StructureMapRegistry>())
{
var linkCrawler = container.GetInstance<LinkCrawler>();
if (args.Length >0)
if (args.Length > 0)
{
string parsed;
var validUrlParser = new ValidUrlParser(new Settings());
var result = validUrlParser.Parse(args[0], out parsed);
if(result)
linkCrawler.BaseUrl = parsed;
var result = validUrlParser.Parse(args[0], out string parsed);
if (result)
{
// make sure the base URL is just a domain
int prefixLength = parsed.IndexOf("//") + 2;
if (parsed.Substring(prefixLength).IndexOf("/") > 0)
{
parsed = parsed.Substring(0, parsed.Substring(prefixLength).IndexOf("/") + prefixLength);
}
linkCrawler.BaseUrl = parsed;
validUrlParser.BaseUrl = parsed;
linkCrawler.ValidUrlParser = validUrlParser;
}
}
linkCrawler.Start();
// this line *has* to be here in order to flush the output to the console, otherwise we don't see anything - why?
Console.Read();
}
}
Expand Down

0 comments on commit 76d101a

Please sign in to comment.