Skip to content

Commit

Permalink
[refurb] Avoid triggering hardcoded-string-charset for reordered …
Browse files Browse the repository at this point in the history
…sets (#14233)

## Summary

It's only safe to enforce the `x in "1234567890"` case if `x` is exactly
one character, since the set on the right has been reordered as compared
to `string.digits`. We can't know if `x` is exactly one character unless
it's a literal. And if it's a literal, well, it's kind of silly code in
the first place?

Closes #13802.
  • Loading branch information
charliermarsh authored Nov 9, 2024
1 parent 1279c20 commit 555a5c9
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 292 deletions.
23 changes: 2 additions & 21 deletions crates/ruff_linter/resources/test/fixtures/refurb/FURB156.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
_ = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
_ = " \t\n\r\v\f"

_ = "" in "1234567890"
_ = "" in "12345670"
_ = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c'
_ = (
'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'
Expand All @@ -19,23 +17,6 @@
_ = id("0123"
"4567"
"89")
_ = "" in ("123"
"456"
"789"
"0")

_ = "" in ( # comment
"123"
"456"
"789"
"0")


_ = "" in (
"123"
"456" # inline comment
"789"
"0")

_ = (
"0123456789"
Expand All @@ -46,8 +27,8 @@
# with comment
).capitalize()

# Ok
# OK

_ = "1234567890"
_ = "1234"
_ = "" in "1234"
_ = "12" in "12345670"
3 changes: 0 additions & 3 deletions crates/ruff_linter/src/checkers/ast/analyze/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1371,9 +1371,6 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
if checker.enabled(Rule::SingleItemMembershipTest) {
refurb::rules::single_item_membership_test(checker, expr, left, ops, comparators);
}
if checker.enabled(Rule::HardcodedStringCharset) {
refurb::rules::hardcoded_string_charset_comparison(checker, compare);
}
}
Expr::NumberLiteral(number_literal @ ast::ExprNumberLiteral { .. }) => {
if checker.source_type.is_stub() && checker.enabled(Rule::NumericLiteralTooLong) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::checkers::ast::Checker;
use crate::importer::ImportRequest;
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::{CmpOp, Expr, ExprCompare, ExprStringLiteral};
use ruff_python_ast::ExprStringLiteral;
use ruff_text_size::TextRange;

/// ## What it does
Expand Down Expand Up @@ -44,14 +44,22 @@ impl AlwaysFixableViolation for HardcodedStringCharset {
}
}

/// FURB156
pub(crate) fn hardcoded_string_charset_literal(checker: &mut Checker, expr: &ExprStringLiteral) {
if let Some(charset) = check_charset_exact(expr.value.to_str().as_bytes()) {
push_diagnostic(checker, expr.range, charset);
}
}

#[derive(Debug, Copy, Clone, Eq, PartialEq)]
struct NamedCharset {
name: &'static str,
bytes: &'static [u8],
ascii_char_set: AsciiCharSet,
}

/// Represents the set of ascii characters in form of a bitset.
#[derive(Copy, Clone, Eq, PartialEq)]
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
struct AsciiCharSet(u128);

impl AsciiCharSet {
Expand Down Expand Up @@ -108,14 +116,6 @@ const KNOWN_NAMED_CHARSETS: [NamedCharset; 9] = [
NamedCharset::new("whitespace", b" \t\n\r\x0b\x0c"),
];

fn check_charset_as_set(bytes: &[u8]) -> Option<&NamedCharset> {
let ascii_char_set = AsciiCharSet::from_bytes(bytes)?;

KNOWN_NAMED_CHARSETS
.iter()
.find(|&charset| charset.ascii_char_set == ascii_char_set)
}

fn check_charset_exact(bytes: &[u8]) -> Option<&NamedCharset> {
KNOWN_NAMED_CHARSETS
.iter()
Expand All @@ -138,34 +138,3 @@ fn push_diagnostic(checker: &mut Checker, range: TextRange, charset: &NamedChars
});
checker.diagnostics.push(diagnostic);
}

/// FURB156
pub(crate) fn hardcoded_string_charset_comparison(checker: &mut Checker, compare: &ExprCompare) {
let (
[CmpOp::In | CmpOp::NotIn],
[Expr::StringLiteral(string_literal @ ExprStringLiteral { value, .. })],
) = (compare.ops.as_ref(), compare.comparators.as_ref())
else {
return;
};

let bytes = value.to_str().as_bytes();

let Some(charset) = check_charset_as_set(bytes) else {
return;
};

// In this case the diagnostic will be emitted via string_literal check.
if charset.bytes == bytes {
return;
}

push_diagnostic(checker, string_literal.range, charset);
}

/// FURB156
pub(crate) fn hardcoded_string_charset_literal(checker: &mut Checker, expr: &ExprStringLiteral) {
if let Some(charset) = check_charset_exact(expr.value.to_str().as_bytes()) {
push_diagnostic(checker, expr.range, charset);
}
}
Loading

0 comments on commit 555a5c9

Please sign in to comment.