close
Skip to content
69 changes: 52 additions & 17 deletions harper-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ enum Args {
/// If omitted, `harper-cli` will run every rule.
#[arg(short, long)]
only_lint_with: Option<Vec<String>>,
/// Specify the language.
#[arg(short, long, default_value = "en")]
langiso639: String,
/// Specify the dialect.
#[arg(short, long, default_value = Dialect::American.to_string())]
dialect: Dialect,
Expand All @@ -50,29 +53,50 @@ enum Args {
},
/// Parse a provided document and print the detected symbols.
Parse {
/// Specify the language.
#[arg(short, long, default_value = "en")]
langiso639: String,
/// The file you wish to parse.
file: PathBuf,
},
/// Parse a provided document and show the spans of the detected tokens.
Spans {
/// Specify the language.
#[arg(short, long, default_value = "en")]
langiso639: String,
/// The file you wish to display the spans.
file: PathBuf,
/// Include newlines in the output
#[arg(short, long)]
include_newlines: bool,
},
/// Get the metadata associated with a particular word.
Metadata { word: String },
Metadata {
/// Specify the language.
#[arg(short, long, default_value = "en")]
langiso639: String,
/// The word you wish to get metadata for.
word: String,
},
/// Get all the forms of a word using the affixes.
Forms { line: String },
Forms {
/// Specify the language.
#[arg(short, long, default_value = "en")]
langiso639: String,
/// The word you wish to get forms for.
line: String,
},
/// Emit a decompressed, line-separated list of the words in Harper's dictionary.
Words,
Words { langiso639: String },
/// Summarize a lint record
SummarizeLintRecord { file: PathBuf },
/// Print the default config with descriptions.
Config,
Config { langiso639: String },
/// Print a list of all the words in a document, sorted by frequency.
MineWords {
/// Specify the language.
#[arg(short, long, default_value = "en")]
langiso639: String,
/// The document to mine words from.
file: PathBuf,
},
Expand All @@ -81,27 +105,28 @@ enum Args {
fn main() -> anyhow::Result<()> {
let args = Args::parse();
let markdown_options = MarkdownOptions::default();
let dictionary = FstDictionary::curated();

match args {
Args::Lint {
file,
count,
only_lint_with,
langiso639,
dialect,
user_dict_path,
file_dict_path,
} => {
let mut merged_dict = MergedDictionary::new();
let dictionary = FstDictionary::curated(&langiso639);
let mut merged_dict = MergedDictionary::new(&langiso639);
merged_dict.add_dictionary(dictionary);

match load_dict(&user_dict_path) {
match load_dict(&user_dict_path, &langiso639) {
Ok(user_dict) => merged_dict.add_dictionary(Arc::new(user_dict)),
Err(err) => println!("{}: {}", user_dict_path.display(), err),
}

let file_dict_path = file_dict_path.join(file_dict_name(&file));
match load_dict(&file_dict_path) {
match load_dict(&file_dict_path, &langiso639) {
Ok(file_dict) => merged_dict.add_dictionary(Arc::new(file_dict)),
Err(err) => println!("{}: {}", file_dict_path.display(), err),
}
Expand Down Expand Up @@ -154,7 +179,8 @@ fn main() -> anyhow::Result<()> {

process::exit(1)
}
Args::Parse { file } => {
Args::Parse { langiso639, file } => {
let dictionary = FstDictionary::curated(&langiso639);
let (doc, _) = load_file(&file, markdown_options, &dictionary)?;

for token in doc.tokens() {
Expand All @@ -165,9 +191,11 @@ fn main() -> anyhow::Result<()> {
Ok(())
}
Args::Spans {
langiso639,
file,
include_newlines,
} => {
let dictionary = FstDictionary::curated(&langiso639);
let (doc, source) = load_file(&file, markdown_options, &dictionary)?;

let primary_color = Color::Blue;
Expand Down Expand Up @@ -209,7 +237,8 @@ fn main() -> anyhow::Result<()> {

Ok(())
}
Args::Words => {
Args::Words { langiso639 } => {
let dictionary = FstDictionary::curated(&langiso639);
let mut word_str = String::new();

for word in dictionary.words_iter() {
Expand All @@ -221,7 +250,8 @@ fn main() -> anyhow::Result<()> {

Ok(())
}
Args::Metadata { word } => {
Args::Metadata { langiso639, word } => {
let dictionary = FstDictionary::curated(&langiso639);
let metadata = dictionary.get_word_metadata_str(&word);
let json = serde_json::to_string_pretty(&metadata).unwrap();

Expand All @@ -239,7 +269,7 @@ fn main() -> anyhow::Result<()> {

Ok(())
}
Args::Forms { line } => {
Args::Forms { langiso639, line } => {
let (word, annot) = line_to_parts(&line);

let curated_word_list = include_str!("../../harper-core/dictionary.dict");
Expand Down Expand Up @@ -290,12 +320,13 @@ fn main() -> anyhow::Result<()> {

if let Some((dict_word, dict_annot)) = &entry_in_dict {
println!("Old, from the dictionary:");
print_word_derivations(dict_word, dict_annot, &FstDictionary::curated());
print_word_derivations(dict_word, dict_annot, &FstDictionary::curated(&langiso639));
};

if !annot.is_empty() {
let rune_words = format!("1\n{line}");
let dict = MutableDictionary::from_rune_files(
&langiso639,
&rune_words,
include_str!("../../harper-core/affixes.json"),
)?;
Expand All @@ -306,13 +337,16 @@ fn main() -> anyhow::Result<()> {

Ok(())
}
Args::Config => {
Args::Config {
langiso639: language,
} => {
#[derive(Serialize)]
struct Config {
default_value: bool,
description: String,
}

let dictionary = FstDictionary::curated(&language);
let linter = LintGroup::new_curated(dictionary, Dialect::American);

let default_config: HashMap<String, bool> =
Expand All @@ -334,7 +368,8 @@ fn main() -> anyhow::Result<()> {

Ok(())
}
Args::MineWords { file } => {
Args::MineWords { langiso639, file } => {
let dictionary = FstDictionary::curated(&langiso639);
let (doc, _source) = load_file(&file, MarkdownOptions::default(), &dictionary)?;

let mut words = HashMap::new();
Expand Down Expand Up @@ -415,10 +450,10 @@ fn print_word_derivations(word: &str, annot: &str, dictionary: &impl Dictionary)
}

/// Sync version of harper-ls/src/dictionary_io@load_dict
fn load_dict(path: &Path) -> anyhow::Result<MutableDictionary> {
fn load_dict(path: &Path, langiso639: &str) -> anyhow::Result<MutableDictionary> {
let str = fs::read_to_string(path)?;

let mut dict = MutableDictionary::new();
let mut dict = MutableDictionary::new(langiso639);
dict.extend_words(
str.lines()
.map(|l| (l.chars().collect::<Vec<_>>(), WordMetadata::default())),
Expand Down
6 changes: 3 additions & 3 deletions harper-comments/src/comment_parsers/jsdoc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,15 +171,15 @@ mod tests {
let source = "/** This should _not_cause an infinite loop: {@ */";
let parser =
CommentParser::new_from_language_id("javascript", MarkdownOptions::default()).unwrap();
Document::new_curated(source, &parser);
Document::new_curated(source, &parser, "en");
}

#[test]
fn handles_inline_link() {
let source = "/** See {@link MyClass} and [MyClass's foo property]{@link MyClass#foo}. */";
let parser =
CommentParser::new_from_language_id("javascript", MarkdownOptions::default()).unwrap();
let document = Document::new_curated(source, &parser);
let document = Document::new_curated(source, &parser, "en");

assert!(matches!(
document
Expand Down Expand Up @@ -224,7 +224,7 @@ mod tests {
let source = "/** @class Circle representing a circle. */";
let parser =
CommentParser::new_from_language_id("javascript", MarkdownOptions::default()).unwrap();
let document = Document::new_curated(source, &parser);
let document = Document::new_curated(source, &parser, "en");

assert!(
document.tokens().all(|t| t.kind.is_unlintable()
Expand Down
5 changes: 4 additions & 1 deletion harper-comments/src/masker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@ pub struct CommentMasker {

impl CommentMasker {
pub fn create_ident_dict(&self, source: &[char]) -> Option<MutableDictionary> {
self.inner.create_ident_dict(source)
// self.inner.create_ident_dict("fake_language_cid", source)
eprintln!("##🚜## CommentMasker/create_ident_dict");
// self.inner.create_ident_dict("es", source)
self.inner.create_ident_dict("en", source)
}

pub fn new(
Expand Down
2 changes: 1 addition & 1 deletion harper-comments/tests/language_support.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ macro_rules! create_test {
);

let parser = CommentParser::new_from_filename(Path::new(filename), MarkdownOptions::default()).unwrap();
let dict = FstDictionary::curated();
let dict = FstDictionary::curated("en");
let document = Document::new(&source, &parser, &dict);

let mut linter = LintGroup::new_curated(dict, Dialect::American);
Expand Down
60 changes: 60 additions & 0 deletions harper-core/affixes-es.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
{
"affixes": {
"a": {
"#": "adjective property",
"kind": "property",
"cross_product": true,
"replacements": [],
"target_metadata": {},
"base_metadata": {
"adjective": {}
}
},
"f": {
"#": "feminine property",
"kind": "property",
"cross_product": true,
"replacements": [],
"target_metadata": {},
"base_metadata": {
"noun": {
"//": "should be an enum rather than a boolean",
"is_feminine": true
}
}
},
"m": {
"#": "masculine property",
"kind": "property",
"cross_product": true,
"replacements": [],
"target_metadata": {},
"base_metadata": {
"noun": {
"//": "should be an enum rather than a boolean",
"is_masculine": true
}
}
},
"n": {
"#": "noun property",
"kind": "property",
"cross_product": true,
"replacements": [],
"target_metadata": {},
"base_metadata": {
"noun": {}
}
},
"v": {
"#": "verb property",
"kind": "property",
"cross_product": true,
"replacements": [],
"target_metadata": {},
"base_metadata": {
"verb": {}
}
}
}
}
8 changes: 4 additions & 4 deletions harper-core/benches/parse_demo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ static ESSAY: &str = include_str!("./essay.md");

fn parse_essay(c: &mut Criterion) {
c.bench_function("parse_essay", |b| {
b.iter(|| Document::new_markdown_default_curated(black_box(ESSAY)));
b.iter(|| Document::new_markdown_default_curated(black_box(ESSAY), "en"));
});
}

fn lint_essay(c: &mut Criterion) {
let dictionary = FstDictionary::curated();
let dictionary = FstDictionary::curated("en");
let mut lint_set = LintGroup::new_curated(dictionary, Dialect::American);
let document = Document::new_markdown_default_curated(black_box(ESSAY));
let document = Document::new_markdown_default_curated(black_box(ESSAY), "en");

c.bench_function("lint_essay", |b| {
b.iter(|| lint_set.lint(&document));
Expand All @@ -23,7 +23,7 @@ fn lint_essay(c: &mut Criterion) {
fn lint_essay_uncached(c: &mut Criterion) {
c.bench_function("lint_essay_uncached", |b| {
b.iter(|| {
let dictionary = FstDictionary::curated();
let dictionary = FstDictionary::curated("en");
let mut lint_set = LintGroup::new_curated(dictionary.clone(), Dialect::American);
let document = Document::new_markdown_default(black_box(ESSAY), &dictionary);
lint_set.lint(&document)
Expand Down
Loading
Loading