Skip to content

Commit

Permalink
Fix https: defaulting to port 80 if no port given (#31)
Browse files Browse the repository at this point in the history
  • Loading branch information
samclarke authored Feb 19, 2023
1 parent c49976e commit 2957d9d
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 5 deletions.
12 changes: 7 additions & 5 deletions Robots.js
Original file line number Diff line number Diff line change
Expand Up @@ -274,16 +274,20 @@ function parseUrl(url) {
// Using non-existent subdomain so can never cause conflict unless
// trying to crawl it but doesn't exist and even if tried worst that can
// happen is it allows relative URLs on it.
return new URL(url, 'http://robots-relative.samclarke.com/');
var url = new URL(url, 'http://robots-relative.samclarke.com/');

if (!url.port) {
url.port = url.protocol === 'https:' ? 443 : 80;
}

return url;
} catch (e) {
return null;
}
}

function Robots(url, contents) {
this._url = parseUrl(url) || {};
this._url.port = this._url.port || 80;

this._rules = Object.create(null);
this._sitemaps = [];
this._preferredHost = null;
Expand Down Expand Up @@ -361,8 +365,6 @@ Robots.prototype._getRule = function (url, ua) {
var parsedUrl = parseUrl(url) || {};
var userAgent = formatUserAgent(ua || '*');

parsedUrl.port = parsedUrl.port || 80;

// The base URL must match otherwise this robots.txt is not valid for it.
if (
parsedUrl.protocol !== this._url.protocol ||
Expand Down
70 changes: 70 additions & 0 deletions test/Robots.js
Original file line number Diff line number Diff line change
Expand Up @@ -791,4 +791,74 @@ describe('Robots', function () {
// machines running the test, should normally be much less)
expect(end - start).to.be.lessThan(500);
});

it('should honor given port number', function () {
var contents = [
'User-agent: *',
'Disallow: /fish/',
'Disallow: /test.html'
].join('\n');

var allowed = [
'http://www.example.com:8080/fish',
'http://www.example.com:8080/Test.html'
];

var disallowed = [
'http://www.example.com/fish',
'http://www.example.com/Test.html',
'http://www.example.com:80/fish',
'http://www.example.com:80/Test.html'
];

testRobots('http://www.example.com:8080/robots.txt', contents, allowed, disallowed);
});

it('should default to port 80 for http: if no port given', function () {
var contents = [
'User-agent: *',
'Disallow: /fish/',
'Disallow: /test.html'
].join('\n');

var allowed = [
'http://www.example.com:80/fish',
'http://www.example.com:80/Test.html'
];

var disallowed = [
'http://www.example.com:443/fish',
'http://www.example.com:443/Test.html',
'http://www.example.com:80/fish/index.php',
'http://www.example.com:80/fish/',
'http://www.example.com:80/test.html'
];

testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
});

it('should default to port 443 for https: if no port given', function () {
var contents = [
'User-agent: *',
'Disallow: /fish/',
'Disallow: /test.html'
].join('\n');

var allowed = [
'https://www.example.com:443/fish',
'https://www.example.com:443/Test.html',
'https://www.example.com/fish',
'https://www.example.com/Test.html'
];

var disallowed = [
'http://www.example.com:80/fish',
'http://www.example.com:80/Test.html',
'http://www.example.com:443/fish/index.php',
'http://www.example.com:443/fish/',
'http://www.example.com:443/test.html'
];

testRobots('https://www.example.com/robots.txt', contents, allowed, disallowed);
});
});

0 comments on commit 2957d9d

Please sign in to comment.