Skip to content

Commit

Permalink
AG-37643 Convert ABP's rules to AdGuard syntax.
Browse files Browse the repository at this point in the history
Squashed commit of the following:

commit a709d71
Merge: 9502b10 e64bdd1
Author: jellizaveta <[email protected]>
Date:   Wed Dec 18 15:40:51 2024 +0300

    Merge branch 'fix/AG-37643' of ssh://bit.int.agrd.dev:7999/adguard-filters/tsurlfilter into fix/AG-37643

commit 9502b10
Author: jellizaveta <[email protected]>
Date:   Wed Dec 18 15:40:27 2024 +0300

    add jsDoc comment

commit e64bdd1
Author: Dávid Tóta <[email protected]>
Date:   Wed Dec 18 14:35:57 2024 +0300

    Applied suggestion

commit 89715a4
Author: jellizaveta <[email protected]>
Date:   Wed Dec 18 12:51:02 2024 +0300

    add test, fix chngelog

commit 6c57008
Author: Slava Leleka <[email protected]>
Date:   Wed Dec 18 12:50:03 2024 +0300

    Applied suggestion

commit 07b2ad4
Author: Slava Leleka <[email protected]>
Date:   Wed Dec 18 12:49:56 2024 +0300

    Applied suggestion

commit 00342b6
Author: jellizaveta <[email protected]>
Date:   Mon Dec 16 12:44:47 2024 +0300

    fix changelog, styles

commit 8496d6a
Author: Dávid Tóta <[email protected]>
Date:   Mon Dec 16 12:46:49 2024 +0300

    Applied suggestion

commit 9fe7dcf
Author: jellizaveta <[email protected]>
Date:   Mon Dec 16 11:39:23 2024 +0300

    fix jsDoc

commit 06ae9b8
Author: Slava Leleka <[email protected]>
Date:   Mon Dec 16 11:24:48 2024 +0300

    Applied suggestion

commit 959f8dd
Author: jellizaveta <[email protected]>
Date:   Fri Dec 13 19:18:11 2024 +0300

    fix naming in tests

commit 429c699
Author: jellizaveta <[email protected]>
Date:   Fri Dec 13 19:13:01 2024 +0300

    update version to 2.3.0

commit 3c7b8e2
Author: jellizaveta <[email protected]>
Date:   Fri Dec 13 19:06:54 2024 +0300

    eslint

commit a38ad21
Author: jellizaveta <[email protected]>
Date:   Fri Dec 13 19:06:01 2024 +0300

    add jsDocs

commit 1dab9d0
Author: jellizaveta <[email protected]>
Date:   Fri Dec 13 19:04:19 2024 +0300

    fix the latest tests

commit 1a93eca
Author: scripthunter7 <[email protected]>
Date:   Fri Dec 13 17:01:00 2024 +0100

    fix one test

commit 004f3f4
Author: scripthunter7 <[email protected]>
Date:   Fri Dec 13 17:00:38 2024 +0100

    add util method for last slot

commit 21ab05e
Author: scripthunter7 <[email protected]>
Date:   Fri Dec 13 17:00:16 2024 +0100

    fix error offsets in css token stream constructor

commit 071fca7
Merge: c6f6196 2db9ade
Author: jellizaveta <[email protected]>
Date:   Fri Dec 13 18:31:24 2024 +0300

    resolve conflict

commit c6f6196
Author: jellizaveta <[email protected]>
Date:   Fri Dec 13 18:30:06 2024 +0300

    add check for curly breckets

... and 11 more commits
  • Loading branch information
jellizaveta committed Dec 18, 2024
1 parent 014a3df commit 45c8052
Show file tree
Hide file tree
Showing 8 changed files with 728 additions and 14 deletions.
9 changes: 9 additions & 0 deletions packages/agtree/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@ The format is based on [Keep a Changelog][keepachangelog], and this project adhe
[keepachangelog]: https://keepachangelog.com/en/1.0.0/
[semver]: https://semver.org/spec/v2.0.0.html

## [2.3.0] - 2024-12-18

### Added

- Support for ABP-syntax CSS injection rules [tsurlfilter#143].

[2.3.0]: https://github.com/AdguardTeam/tsurlfilter/releases/tag/agtree-v2.3.0
[tsurlfilter#143]: https://github.com/AdguardTeam/tsurlfilter/issues/143

## [2.2.0] - 2024-11-27

### Removed
Expand Down
60 changes: 53 additions & 7 deletions packages/agtree/src/parser/cosmetic/index.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
/* eslint-disable no-param-reassign */
import { sprintf } from 'sprintf-js';
import { TokenType } from '@adguard/css-tokenizer';

import { CosmeticRuleSeparatorUtils } from '../../utils/cosmetic-rule-separator';
import { AdblockSyntax } from '../../utils/adblockers';
import { DomainListParser } from '../misc/domain-list';
import { ModifierListParser } from '../misc/modifier-list';
import {
ADG_SCRIPTLET_MASK,
CSS_BLOCK_OPEN,
CSS_BLOCK_CLOSE,
CLOSE_PARENTHESIS,
CLOSE_SQUARE_BRACKET,
COLON,
Expand Down Expand Up @@ -56,6 +59,7 @@ import { type InputByteBuffer } from '../../utils/input-byte-buffer';
import { ValueParser } from '../misc/value';
import { isUndefined } from '../../utils/type-guards';
import { BINARY_SCHEMA_VERSION } from '../../utils/binary-schema-version';
import { hasToken } from '../css/has-token';

/**
* Value map for binary serialization. This helps to reduce the size of the serialized data,
Expand Down Expand Up @@ -535,6 +539,37 @@ export class CosmeticRuleParser extends ParserBase {
};
};

/**
* Parses Adb CSS injection rules
* eg: example.com##.foo { display: none; }
*
* @returns parsed rule
*/
const parseAbpCssInjection = (): Pick<CssInjectionRule, RestProps> | null => {
if (!options.parseAbpSpecificRules) {
return null;
}

// check if the rule contains both CSS block open and close characters
// if none of them is present we can stop parsing
if (rawBody.indexOf(CSS_BLOCK_OPEN) === -1 && rawBody.indexOf(CSS_BLOCK_CLOSE) === -1) {
return null;
}

if (!hasToken(rawBody, new Set([TokenType.OpenCurlyBracket, TokenType.CloseCurlyBracket]))) {
return null;
}

// try to parse the raw body as an AdGuard CSS injection rule
const body = AdgCssInjectionParser.parse(rawBody, options, baseOffset + bodyStart);
// if the parsed rule type is a 'CssInjectionRuleBody', return the parsed rule
return {
syntax: AdblockSyntax.Abp,
type: CosmeticRuleType.CssInjectionRule,
body,
};
};

const parseAbpSnippetInjection = (): Pick<ScriptletInjectionRule, RestProps> | null => {
if (!options.parseAbpSpecificRules) {
throw new AdblockSyntaxError(
Expand Down Expand Up @@ -687,11 +722,23 @@ export class CosmeticRuleParser extends ParserBase {
// the next function is called, and so on.
// If all functions return null, an error should be thrown.
const separatorMap = {
'##': [parseUboHtmlFiltering, parseUboScriptletInjection, parseUboCssInjection, parseElementHiding],
'#@#': [parseUboHtmlFiltering, parseUboScriptletInjection, parseUboCssInjection, parseElementHiding],

'#?#': [parseUboCssInjection, parseElementHiding],
'#@?#': [parseUboCssInjection, parseElementHiding],
'##': [
parseUboHtmlFiltering,
parseUboScriptletInjection,
parseUboCssInjection,
parseAbpCssInjection,
parseElementHiding,
],
'#@#': [
parseUboHtmlFiltering,
parseUboScriptletInjection,
parseUboCssInjection,
parseAbpCssInjection,
parseElementHiding,
],

'#?#': [parseUboCssInjection, parseAbpCssInjection, parseElementHiding],
'#@?#': [parseUboCssInjection, parseAbpCssInjection, parseElementHiding],

'#$#': [parseAdgCssInjection, parseAbpSnippetInjection],
'#@$#': [parseAdgCssInjection, parseAbpSnippetInjection],
Expand Down Expand Up @@ -772,15 +819,14 @@ export class CosmeticRuleParser extends ParserBase {
*/
public static generateBody(node: AnyCosmeticRule): string {
let result = EMPTY;

// Body
switch (node.type) {
case CosmeticRuleType.ElementHidingRule:
result = node.body.selectorList.value;
break;

case CosmeticRuleType.CssInjectionRule:
if (node.syntax === AdblockSyntax.Adg) {
if (node.syntax === AdblockSyntax.Adg || node.syntax === AdblockSyntax.Abp) {
result = AdgCssInjectionParser.generate(node.body);
} else if (node.syntax === AdblockSyntax.Ubo) {
if (node.body.mediaQueryList) {
Expand Down
23 changes: 16 additions & 7 deletions packages/agtree/src/parser/css/css-token-stream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,14 +112,23 @@ export class CssTokenStream {
// - end: end index of the token
// - props: additional properties of the token, if any (we don't use it here, this is why we use underscore)
// - balance: balance level of the token
tokenizeBalanced(source, (type, start, end, _, balance) => {
this.tokens.push({
type,
start,
end,
balance,
try {
tokenizeBalanced(source, (type, start, end, _, balance) => {
this.tokens.push({
type,
start,
end,
balance,
});
});
});
} catch (error) {
// If the error is an AdblockSyntaxError, adjust the error positions to the base offset
if (error instanceof AdblockSyntaxError) {
error.start += baseOffset;
error.end += baseOffset;
throw error;
}
}

this.index = 0;

Expand Down
38 changes: 38 additions & 0 deletions packages/agtree/src/parser/css/has-token.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import { type TokenType, tokenizeExtended } from '@adguard/css-tokenizer';

/**
* Represents an error that occurs when an operation is aborted.
*/
class AbortError extends Error {
constructor() {
super('Aborted');
}
}

// TODO: AG-38480 add a stop function to the tokenizers callback and move `hasToken` to CSS Tokenizer as well
/**
* Checks if the given raw string contains any of the specified tokens.
* This function uses error throwing inside the abort tokenization process.
*
* @param raw - The raw string to be tokenized and checked.
* @param tokens - A set of token types to check for in the raw string.
* @returns `true` if any of the specified tokens are found in the raw string, otherwise `false`.
*/
export const hasToken = (raw: string, tokens: Set<TokenType>): boolean => {
try {
tokenizeExtended(
raw,
(type: TokenType) => {
if (tokens.has(type)) {
throw new AbortError();
}
},
);
} catch (e) {
if (e instanceof AbortError) {
return true;
}
throw e;
}
return false;
};
26 changes: 26 additions & 0 deletions packages/agtree/test/converter/converter.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,32 @@ describe('Converter integration tests', () => {
expected: ['#?#.banner'],
shouldConvert: false,
},
{
actual: '##.banner { display: none; }',
expected: ['#$#.banner { display: none; }'],
shouldConvert: true,
},
{
actual: '##.banner { remove: true; }',
expected: ['#$?#.banner { remove: true; }'],
shouldConvert: true,
},
// case without spaces in css pseudo property
{
actual: '##.banner {remove:true;}',
expected: ['#$?#.banner { remove: true; }'],
shouldConvert: true,
},
{
actual: '##div[foo="yay{"]',
expected: ['##div[foo="yay{"]'],
shouldConvert: false,
},
{
actual: '##div[foo="yay{"][href="yay}"]',
expected: ['##div[foo="yay{"][href="yay}"]'],
shouldConvert: false,
},
])('should convert \'$actual\' to \'$expected\'', (testData) => {
expect(testData).toBeConvertedProperly(RuleConverter, 'convertToAdg');
});
Expand Down
12 changes: 12 additions & 0 deletions packages/agtree/test/helpers/node-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,18 @@ export class NodeExpectContext {
};
}

/**
* Gets the range for the last slot in the actual string.
*
* @returns Location range. See {@link Range}.
*/
public getLastSlotRange(): Range {
return {
start: this.actual.length - 1,
end: this.actual.length,
};
}

/**
* Converts the given range to a tuple.
*
Expand Down
Loading

0 comments on commit 45c8052

Please sign in to comment.