Skip to content

Commit

Permalink
Rollup merge of #126057 - Sunshine40:rustdoc-search-non-english, r=no…
Browse files Browse the repository at this point in the history
…triddle

Make html rendered by rustdoc allow searching non-English identifier / alias

Fix alias search result showing `undefined` description.

Inspired by rust-lang/mdBook#2393 .

Not sure if it's worth it adding full-text search functionality to rustdoc rendered html.
  • Loading branch information
fmease authored Jun 8, 2024
2 parents ba31a0a + ceaa42b commit 1f715eb
Show file tree
Hide file tree
Showing 12 changed files with 362 additions and 85 deletions.
4 changes: 2 additions & 2 deletions src/ci/docker/host-x86_64/mingw-check/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ ENV SCRIPT python3 ../x.py check --stage 0 --set build.optimized-compiler-builti
/scripts/validate-toolstate.sh && \
/scripts/validate-error-codes.sh && \
reuse --include-submodules lint && \
# Runs checks to ensure that there are no ES5 issues in our JS code.
es-check es8 ../src/librustdoc/html/static/js/*.js && \
# Runs checks to ensure that there are no issues in our JS code.
es-check es2019 ../src/librustdoc/html/static/js/*.js && \
eslint -c ../src/librustdoc/html/static/.eslintrc.js ../src/librustdoc/html/static/js/*.js && \
eslint -c ../src/tools/rustdoc-js/.eslintrc.js ../src/tools/rustdoc-js/tester.js && \
eslint -c ../src/tools/rustdoc-gui/.eslintrc.js ../src/tools/rustdoc-gui/tester.js
2 changes: 1 addition & 1 deletion src/librustdoc/html/static/.eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ module.exports = {
},
"extends": "eslint:recommended",
"parserOptions": {
"ecmaVersion": 8,
"ecmaVersion": 2019,
"sourceType": "module"
},
"rules": {
Expand Down
3 changes: 2 additions & 1 deletion src/librustdoc/html/static/js/externs.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@ let ParserState;
* foundElems: number,
* totalElems: number,
* literalSearch: boolean,
* corrections: Array<{from: string, to: integer}>,
* corrections: Array<{from: string, to: integer}> | null,
* typeFingerprint: Uint32Array,
* error: Array<string> | null,
* }}
*/
let ParsedQuery;
Expand Down
156 changes: 85 additions & 71 deletions src/librustdoc/html/static/js/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ const ROOT_PATH = typeof window !== "undefined" ? window.rootPath : "../";
// of permutations we need to check.
const UNBOXING_LIMIT = 5;

// used for search query verification
const REGEX_IDENT = /\p{ID_Start}\p{ID_Continue}*|_\p{ID_Continue}+/uy;
const REGEX_INVALID_TYPE_FILTER = /[^a-z]/ui;

// In the search display, allows to switch between tabs.
function printTab(nb) {
let iter = 0;
Expand Down Expand Up @@ -410,18 +414,21 @@ function initSearch(rawSearchIndex) {
}

/**
* Returns `true` if the given `c` character is valid for an ident.
* If the current parser position is at the beginning of an identifier,
* move the position to the end of it and return `true`. Otherwise, return `false`.
*
* @param {string} c
* @param {ParserState} parserState
*
* @return {boolean}
*/
function isIdentCharacter(c) {
return (
c === "_" ||
(c >= "0" && c <= "9") ||
(c >= "a" && c <= "z") ||
(c >= "A" && c <= "Z"));
function consumeIdent(parserState) {
REGEX_IDENT.lastIndex = parserState.pos;
const match = parserState.userQuery.match(REGEX_IDENT);
if (match) {
parserState.pos += match[0].length;
return true;
}
return false;
}

/**
Expand Down Expand Up @@ -618,70 +625,62 @@ function initSearch(rawSearchIndex) {
* @return {integer}
*/
function getIdentEndPosition(parserState) {
const start = parserState.pos;
let afterIdent = consumeIdent(parserState);
let end = parserState.pos;
let foundExclamation = -1;
let macroExclamation = -1;
while (parserState.pos < parserState.length) {
const c = parserState.userQuery[parserState.pos];
if (!isIdentCharacter(c)) {
if (c === "!") {
if (foundExclamation !== -1) {
throw ["Cannot have more than one ", "!", " in an ident"];
} else if (parserState.pos + 1 < parserState.length &&
isIdentCharacter(parserState.userQuery[parserState.pos + 1])
) {
if (c === "!") {
if (macroExclamation !== -1) {
throw ["Cannot have more than one ", "!", " in an ident"];
} else if (parserState.pos + 1 < parserState.length) {
const pos = parserState.pos;
parserState.pos++;
const beforeIdent = consumeIdent(parserState);
parserState.pos = pos;
if (beforeIdent) {
throw ["Unexpected ", "!", ": it can only be at the end of an ident"];
}
foundExclamation = parserState.pos;
} else if (isPathSeparator(c)) {
if (c === ":") {
if (!isPathStart(parserState)) {
}
if (afterIdent) macroExclamation = parserState.pos;
} else if (isPathSeparator(c)) {
if (c === ":") {
if (!isPathStart(parserState)) {
break;
}
// Skip current ":".
parserState.pos += 1;
} else {
while (parserState.pos + 1 < parserState.length) {
const next_c = parserState.userQuery[parserState.pos + 1];
if (next_c !== " ") {
break;
}
// Skip current ":".
parserState.pos += 1;
} else {
while (parserState.pos + 1 < parserState.length) {
const next_c = parserState.userQuery[parserState.pos + 1];
if (next_c !== " ") {
break;
}
parserState.pos += 1;
}
}
if (foundExclamation !== -1) {
if (foundExclamation !== start &&
isIdentCharacter(parserState.userQuery[foundExclamation - 1])
) {
throw ["Cannot have associated items in macros"];
} else {
// while the never type has no associated macros, we still
// can parse a path like that
foundExclamation = -1;
}
}
} else if (
c === "[" ||
c === "(" ||
isEndCharacter(c) ||
isSpecialStartCharacter(c) ||
isSeparatorCharacter(c)
) {
break;
} else if (parserState.pos > 0) {
throw ["Unexpected ", c, " after ", parserState.userQuery[parserState.pos - 1]];
} else {
throw ["Unexpected ", c];
}
if (macroExclamation !== -1) {
throw ["Cannot have associated items in macros"];
}
} else if (
c === "[" ||
c === "(" ||
isEndCharacter(c) ||
isSpecialStartCharacter(c) ||
isSeparatorCharacter(c)
) {
break;
} else if (parserState.pos > 0) {
throw ["Unexpected ", c, " after ", parserState.userQuery[parserState.pos - 1],
" (not a valid identifier)"];
} else {
throw ["Unexpected ", c, " (not a valid identifier)"];
}
parserState.pos += 1;
afterIdent = consumeIdent(parserState);
end = parserState.pos;
}
// if start == end - 1, we got the never type
if (foundExclamation !== -1 &&
foundExclamation !== start &&
isIdentCharacter(parserState.userQuery[foundExclamation - 1])
) {
if (macroExclamation !== -1) {
if (parserState.typeFilter === null) {
parserState.typeFilter = "macro";
} else if (parserState.typeFilter !== "macro") {
Expand All @@ -693,7 +692,7 @@ function initSearch(rawSearchIndex) {
" both specified",
];
}
end = foundExclamation;
end = macroExclamation;
}
return end;
}
Expand Down Expand Up @@ -1071,16 +1070,15 @@ function initSearch(rawSearchIndex) {
function checkExtraTypeFilterCharacters(start, parserState) {
const query = parserState.userQuery.slice(start, parserState.pos).trim();

for (const c in query) {
if (!isIdentCharacter(query[c])) {
throw [
"Unexpected ",
query[c],
" in type filter (before ",
":",
")",
];
}
const match = query.match(REGEX_INVALID_TYPE_FILTER);
if (match) {
throw [
"Unexpected ",
match[0],
" in type filter (before ",
":",
")",
];
}
}

Expand Down Expand Up @@ -2127,7 +2125,7 @@ function initSearch(rawSearchIndex) {
};
}

function handleAliases(ret, query, filterCrates, currentCrate) {
async function handleAliases(ret, query, filterCrates, currentCrate) {
const lowerQuery = query.toLowerCase();
// We separate aliases and crate aliases because we want to have current crate
// aliases to be before the others in the displayed results.
Expand Down Expand Up @@ -2163,6 +2161,15 @@ function initSearch(rawSearchIndex) {
crateAliases.sort(sortFunc);
aliases.sort(sortFunc);

const fetchDesc = alias => {
return searchIndexEmptyDesc.get(alias.crate).contains(alias.bitIndex) ?
"" : searchState.loadDesc(alias);
};
const [crateDescs, descs] = await Promise.all([
Promise.all(crateAliases.map(fetchDesc)),
Promise.all(aliases.map(fetchDesc)),
]);

const pushFunc = alias => {
alias.alias = query;
const res = buildHrefAndPath(alias);
Expand All @@ -2176,7 +2183,13 @@ function initSearch(rawSearchIndex) {
}
};

aliases.forEach((alias, i) => {
alias.desc = descs[i];
});
aliases.forEach(pushFunc);
crateAliases.forEach((alias, i) => {
alias.desc = crateDescs[i];
});
crateAliases.forEach(pushFunc);
}

Expand Down Expand Up @@ -2538,7 +2551,8 @@ function initSearch(rawSearchIndex) {
sorted_returned,
sorted_others,
parsedQuery);
handleAliases(ret, parsedQuery.original.replace(/"/g, ""), filterCrates, currentCrate);
await handleAliases(ret, parsedQuery.original.replace(/"/g, ""),
filterCrates, currentCrate);
await Promise.all([ret.others, ret.returned, ret.in_args].map(async list => {
const descs = await Promise.all(list.map(result => {
return searchIndexEmptyDesc.get(result.crate).contains(result.bitIndex) ?
Expand Down
2 changes: 1 addition & 1 deletion src/tools/rustdoc-gui/.eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ module.exports = {
},
"extends": "eslint:recommended",
"parserOptions": {
"ecmaVersion": 2018,
"ecmaVersion": 2019,
"sourceType": "module"
},
"rules": {
Expand Down
2 changes: 1 addition & 1 deletion src/tools/rustdoc-js/.eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ module.exports = {
},
"extends": "eslint:recommended",
"parserOptions": {
"ecmaVersion": 8,
"ecmaVersion": 2019,
"sourceType": "module"
},
"rules": {
Expand Down
23 changes: 16 additions & 7 deletions tests/rustdoc-js-std/parser-errors.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ const PARSED = [
original: "-> *",
returned: [],
userQuery: "-> *",
error: "Unexpected `*` after ` `",
error: "Unexpected `*` after ` ` (not a valid identifier)",
},
{
query: 'a<"P">',
Expand Down Expand Up @@ -204,16 +204,25 @@ const PARSED = [
original: "_:",
returned: [],
userQuery: "_:",
error: "Unexpected `:` (expected path after type filter `_:`)",
error: "Unexpected `_` (not a valid identifier)",
},
{
query: "_:a",
query: "ab:",
elems: [],
foundElems: 0,
original: "_:a",
original: "ab:",
returned: [],
userQuery: "_:a",
error: "Unknown type filter `_`",
userQuery: "ab:",
error: "Unexpected `:` (expected path after type filter `ab:`)",
},
{
query: "a:b",
elems: [],
foundElems: 0,
original: "a:b",
returned: [],
userQuery: "a:b",
error: "Unknown type filter `a`",
},
{
query: "a-bb",
Expand All @@ -240,7 +249,7 @@ const PARSED = [
original: "ab'",
returned: [],
userQuery: "ab'",
error: "Unexpected `'` after `b`",
error: "Unexpected `'` after `b` (not a valid identifier)",
},
{
query: "a->",
Expand Down
2 changes: 1 addition & 1 deletion tests/rustdoc-js/basic.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
const EXPECTED = {
'query': 'Fo',
'others': [
{ 'path': 'basic', 'name': 'Foo' },
{ 'path': 'basic', 'name': 'Foo', 'desc': 'Docs for Foo' },
],
};
Loading

0 comments on commit 1f715eb

Please sign in to comment.