Skip to content

Commit

Permalink
Fix percent encoding for non-ASCII characters
Browse files Browse the repository at this point in the history
  • Loading branch information
drmathias committed Aug 27, 2023
1 parent 9915b83 commit 22ccadb
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 2 deletions.
19 changes: 17 additions & 2 deletions src/Robots.Txt.Parser/UrlRule.cs
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ private static string EncodeUrlPath(string value)

// if (character == '/' || _pChars.Value.Contains(character)) encodedUrlPathBuilder.Append(character);
if (character == '/' || (char.IsAscii(character) && !_reservedChars.Contains(character))) encodedUrlPathBuilder.Append(character);
else encodedUrlPathBuilder.Append(Uri.HexEscape(character));
else encodedUrlPathBuilder.Append(PercentEncode(character));

}

if (pathAndTheRest.Length == 1) return encodedUrlPathBuilder.ToString();
Expand All @@ -158,12 +159,26 @@ private static string EncodeUrlPath(string value)
}

if (char.IsAscii(character) && !_reservedChars.Contains(character)) encodedUrlPathBuilder.Append(character);
else encodedUrlPathBuilder.Append(Uri.HexEscape(character));
else encodedUrlPathBuilder.Append(PercentEncode(character));
}

return encodedUrlPathBuilder.ToString();
}

private static string PercentEncode(char value)
{
var hexString = Convert.ToHexString(Encoding.UTF8.GetBytes(value.ToString()));
return string.Create(hexString.Length / 2 * 3, hexString, (chars, state) =>
{
var hexStringSpan = state.AsSpan();
for (var offset = 0; offset < chars.Length; offset += 3)
{
chars[offset] = '%';
hexStringSpan.Slice(offset / 3 * 2, 2).CopyTo(chars.Slice(offset + 1, 2));
}
});
}

private static string DecodePercentEncodedUnreservedCharacters(string value)
{
foreach (var percentEncoding in _unreservedCharactersPercentEncoded)
Expand Down
41 changes: 41 additions & 0 deletions tests/Robots.Txt.Parser.Tests.Unit/UrlRuleTests.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using System;
using System.Text;
using FluentAssertions;
using Xunit;

Expand Down Expand Up @@ -342,4 +344,43 @@ public void Matches_UnescapedQueryStringInPathButRuleEscaped_ReturnTrue()
// Assert
matches.Should().Be(true);
}

[Fact]
public void Matches_ExistingEscapedUtf8Character_ReturnTrue()
{
// Arrange
var urlRule = new UrlRule(RuleType.Disallow, "/foo/bar/%E3%83%84");

// Act
var matches = urlRule.Pattern.Matches("/foo/bar/%E3%83%84");

// Assert
matches.Should().Be(true);
}

[Fact]
public void Matches_ExistingEscapedUtf8CharacterRuleOnly_ReturnTrue()
{
// Arrange
var urlRule = new UrlRule(RuleType.Disallow, "/foo/bar/ツ");

// Act
var matches = urlRule.Pattern.Matches("/foo/bar/%E3%83%84");

// Assert
matches.Should().Be(true);
}

[Fact]
public void Matches_ExistingEscapedUtf8CharacterPathOnly_ReturnTrue()
{
// Arrange
var urlRule = new UrlRule(RuleType.Disallow, "/foo/bar/%E3%83%84");

// Act
var matches = urlRule.Pattern.Matches("/foo/bar/ツ");

// Assert
matches.Should().Be(true);
}
}

0 comments on commit 22ccadb

Please sign in to comment.