Skip to content

Commit

Permalink
test: make sure linter descriptions are grammatically correct
Browse files Browse the repository at this point in the history
Which helped me discover bugs in the `CompoundWords` linter.
  • Loading branch information
elijah-potter committed Jan 3, 2025
1 parent f70f289 commit 1e0b5b0
Show file tree
Hide file tree
Showing 16 changed files with 95 additions and 36 deletions.
2 changes: 1 addition & 1 deletion demo.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ checkers don't cut it. That s where Harper comes in handy.
Harper is an language checker for developers. it can detect
improper capitalization and misspellled words,
as well as a number of other issues.
Like if you break up words you shouldn't.
Like if you break up words you shoul dn't.

Harper works everywhere, even offline. Since you r data
never leaves your device, you don't ned to worry aout us
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public static void main(String[] args) {
/**
* This doc has a link in it: {@link this sould b ignor} but not tis
*
* @param name this is an other test.
* @param name this is anoher test.
*/
public static void greet(String name) {
System.out.println("Hello " + name + ".");
Expand Down
3 changes: 1 addition & 2 deletions harper-core/dictionary.dict
Original file line number Diff line number Diff line change
Expand Up @@ -28344,7 +28344,7 @@ inimitably/
iniquitous/5Y
iniquity/1SM
initial/514SGMDY
initialism/1
initialism/1MS
initialization/1
initialize/4DSG
initialized/4AU
Expand Down Expand Up @@ -49636,7 +49636,6 @@ scatterplot/14SMG
Wikilink/MS1
stacktrace/SM1
scrollbar/1SM
break-up/1SM
sweetgrass/1SM
PowerShell/SM
WebSocket/SM
Expand Down
27 changes: 27 additions & 0 deletions harper-core/src/lexing/hostname.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,30 @@
use crate::TokenKind;

use super::FoundToken;

/// Lex a hostname token.
pub fn lex_hostname_token(source: &[char]) -> Option<FoundToken> {
let len = lex_hostname(source)?;

// Might be word, just skip it.
if len <= 1 {
return None;
}

if !source.get(1..len - 1)?.contains(&'.') {
return None;
}

if source.get(len - 1) == Some(&'.') {
return None;
}

Some(FoundToken {
next_index: len,
token: TokenKind::Hostname,
})
}

pub fn lex_hostname(source: &[char]) -> Option<usize> {
let mut passed_chars = 0;

Expand Down
16 changes: 16 additions & 0 deletions harper-core/src/lexing/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ mod email_address;
mod hostname;
mod url;

use hostname::lex_hostname_token;
use url::lex_url;

use self::email_address::lex_email_address;
Expand All @@ -27,6 +28,7 @@ pub fn lex_token(source: &[char]) -> Option<FoundToken> {
lex_number,
lex_url,
lex_email_address,
lex_hostname_token,
lex_word,
lex_catch,
];
Expand Down Expand Up @@ -161,11 +163,25 @@ fn lex_catch(_source: &[char]) -> Option<FoundToken> {

#[cfg(test)]
mod tests {
use super::lex_token;
use super::lex_word;
use super::{FoundToken, TokenKind};

#[test]
fn lexes_cjk_as_unlintable() {
let source: Vec<_> = "世".chars().collect();
assert!(lex_word(&source).is_none());
}

#[test]
fn lexes_youtube_as_hostname() {
let source: Vec<_> = "YouTube.com".chars().collect();
assert!(matches!(
lex_token(&source),
Some(FoundToken {
token: TokenKind::Hostname,
..
})
));
}
}
2 changes: 1 addition & 1 deletion harper-core/src/linting/an_a.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ impl Linter for AnA {
}

fn description(&self) -> &'static str {
"A rule that looks for incorrect indefinite articles. For example, \"this is an mule\" would be flagged as incorrect."
"A rule that looks for incorrect indefinite articles. For example, `this is an mule` would be flagged as incorrect."
}
}

Expand Down
27 changes: 5 additions & 22 deletions harper-core/src/linting/compound_words.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,18 +51,10 @@ impl Linter for CompoundWords {
merged_word.extend_from_slice(b_chars);

// Check for closed compound words
if self.dict.contains_word(&merged_word) {
potential_compounds.push(merged_word.clone());
}

// Check for hyphenated compound words
merged_word.clear();
merged_word.extend_from_slice(a_chars);
merged_word.push('-');
merged_word.extend_from_slice(b_chars);

// Check for closed compound words
if self.dict.contains_word(&merged_word) {
if self.dict.contains_word(&merged_word)
&& !a.kind.is_common_word()
&& !b.kind.is_common_word()
{
potential_compounds.push(merged_word.clone());
}

Expand Down Expand Up @@ -134,15 +126,6 @@ mod tests {
);
}

#[test]
fn makeup() {
assert_lint_count(
"She spent a lot of time doing her make up this morning.",
CompoundWords::default(),
1,
);
}

#[test]
fn birthday() {
assert_lint_count(
Expand Down Expand Up @@ -175,7 +158,7 @@ mod tests {
assert_suggestion_count(
"Like if you break up words you shouldn't.",
CompoundWords::default(),
2,
0,
);
}
}
2 changes: 1 addition & 1 deletion harper-core/src/linting/correct_number_suffix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ impl Linter for CorrectNumberSuffix {
}

fn description(&self) -> &'static str {
"When making quick edits, it is common for authors to change the value of a number without changing its suffix. This rule looks for these cases, for example: \"2st\"."
"When making quick edits, it is common for authors to change the value of a number without changing its suffix. This rule looks for these cases, for example: `2st`."
}
}

Expand Down
2 changes: 1 addition & 1 deletion harper-core/src/linting/linking_verbs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ impl Linter for LinkingVerbs {
}

fn description(&self) -> &'static str {
"Linking verbs connect nouns to other ideas. Make sure you do not accidentaly link words that aren't nouns."
"Linking verbs connect nouns to other ideas. Make sure you do not accidentally link words that aren't nouns."
}
}

Expand Down
31 changes: 28 additions & 3 deletions harper-core/src/linting/lint_group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ macro_rules! create_lint_group_config {
pub spell_check: &'a str
}


impl<'a> LintGroupDescriptions<'a> {
/// Create a [`Vec`] containing the key-value pairs of this struct.
pub fn to_vec_pairs(self) -> Vec<(&'static str, &'a str)>{
vec![$((stringify!([<$linter:snake>]), self.[<$linter:snake>],),)* ("spell_check", self.spell_check)]
}
}

#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)]
pub struct LintGroupConfig {
$(
Expand Down Expand Up @@ -175,7 +183,7 @@ create_lint_group_config!(
AppleNames => true,
AzureNames => true,
CompoundWords => true,
PluralConjugate => true
PluralConjugate => false
);

impl<T: Dictionary + Default> Default for LintGroup<T> {
Expand All @@ -186,13 +194,30 @@ impl<T: Dictionary + Default> Default for LintGroup<T> {

#[cfg(test)]
mod tests {
use crate::FullDictionary;
use crate::{linting::Linter, Document, FstDictionary, FullDictionary};

use super::LintGroup;
use super::{LintGroup, LintGroupConfig};

#[test]
fn can_get_all_descriptions() {
let group = LintGroup::<FullDictionary>::default();
group.all_descriptions();
}

#[test]
fn lint_descriptions_are_clean() {
let mut group = LintGroup::new(LintGroupConfig::default(), FstDictionary::curated());
let pairs: Vec<_> = group
.all_descriptions()
.to_vec_pairs()
.into_iter()
.map(|(a, b)| (a.to_string(), b.to_string()))
.collect();

for (key, value) in pairs {
let doc = Document::new_markdown_curated(&value);
eprintln!("{key}: {value}");
assert!(group.lint(&doc).is_empty())
}
}
}
3 changes: 1 addition & 2 deletions harper-core/src/linting/long_sentences.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ impl Linter for LongSentences {
}

fn description(&self) -> &'static str {
"This rule looks for run-on sentences, which can make your work harder to grok.
"
"This rule looks for run-on sentences, which can make your work harder to grok."
}
}
2 changes: 1 addition & 1 deletion harper-core/src/linting/that_which.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ impl PatternLinter for ThatWhich {
}

fn description(&self) -> &'static str {
"Repeating the word \"that\" twice is often redundent. \"That which\" is easier to read."
"Repeating the word \"that\" twice is often redundant. `That which` is easier to read."
}
}

Expand Down
8 changes: 8 additions & 0 deletions harper-core/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,14 @@ impl TokenKind {
metadata.is_not_plural_noun()
}

pub fn is_common_word(&self) -> bool {
let TokenKind::Word(metadata) = self else {
return true;
};

metadata.common
}

pub fn is_plural_noun(&self) -> bool {
let TokenKind::Word(metadata) = self else {
return false;
Expand Down
1 change: 1 addition & 0 deletions harper-core/tests/run_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,4 @@ create_test!(chinese_lorem_ipsum.md, 2);
create_test!(obsidian_links.md, 2);
create_test!(issue_267.md, 0);
create_test!(proper_noun_capitalization.md, 2);
create_test!(amazon_hostname.md, 0);
1 change: 1 addition & 0 deletions harper-core/tests/test_sources/amazon_hostname.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is a test of whether Amazon.com is considered a URI.
2 changes: 1 addition & 1 deletion packages/harper.js/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"scripts": {
"dev": "vite",
"build": "tsc && vite build",
"test": "vitest run --browser firefox && vitest run --browser chromium"
"test": "vitest run --browser chromium && vitest run --browser firefox"
},
"devDependencies": {
"wasm": "link:../../harper-wasm/pkg",
Expand Down

0 comments on commit 1e0b5b0

Please sign in to comment.