From 73ba03a816d721c520eeae49067eeecbdfbebda5 Mon Sep 17 00:00:00 2001 From: Nihil Carcosa Date: Sun, 13 Jul 2025 22:04:16 +0200 Subject: [PATCH] various fixes --- input.txt | 4 ++-- src/lib.rs | 47 +++++++++++++++++++++++++++------------------ src/main.rs | 3 +++ wordlist-german.txt | 16 +++++++++++++++ 4 files changed, 49 insertions(+), 21 deletions(-) diff --git a/input.txt b/input.txt index 3e4db06..7b1f5e9 100644 --- a/input.txt +++ b/input.txt @@ -15,8 +15,8 @@ daß dieſes prophetiſche Wunder ſymboliſche Bedeutung hat und geiſtlich zu allen Zeiten in der Gemeinde wiederholt werden muß. Wenigſtens müſſen wir beſtändig um die geiſtliche Erneuerung deſſelben flehen. Wir ſehnen uns danach, wir erflehen ſie. Auch -unſere Brunnen ſind abgeſtanden , faul, vergiftet und hauchen -den Tod aus , der nicht eine Stadt und Gegend , ſondern eine +unſere Brunnen ſind abgeſtanden, faul, vergiftet und hauchen +den Tod aus, der nicht eine Stadt und Gegend, ſondern eine Welt zu verderben droht, und in unſern Tagen ſchreklicher als jemals vorher wüthet. Jene vergifteten Brunnen ſind die Wiſ- ſenſc String { let mut list_correct_words: Vec = Vec::new(); if let Ok(lines) = read_lines(list_path) { @@ -16,7 +16,7 @@ pub mod utils { let list_iter = list_correct_words.iter(); let mut has_match: bool = false; for correct_word in list_iter.clone() { - if word.eq(correct_word) { + if word.eq(correct_word) || word.to_lowercase().eq(correct_word) { has_match = true; } } @@ -43,9 +43,9 @@ pub mod utils { return String::from(closest); } } - - pub fn correct_file (file_path: String, list_path: String, output_path: String) { - let re : Regex = Regex::new(r"[\w]+[\W]+").unwrap(); + + pub fn correct_file(file_path: String, list_path: String, output_path: String) { + let re: Regex = Regex::new(r"[\w]+[\W]+").unwrap(); let re2: Regex = Regex::new(r"(?P[\W&&[^\s]&&[^\n]]*)(?P[\w]+)(?P[\W&&[^\s]&&[^\n]]*)").unwrap(); let mut input_file_lines: Vec = Vec::new(); if let Ok(lines) = read_lines(file_path) { @@ -58,14 +58,15 @@ pub mod utils { let line_iter = line.split_whitespace(); let mut words: Vec = Vec::new(); for word in line_iter { - if re.is_match(word) { let mut word_buffer: String; - word_buffer = word.replace(&['(',')', '/', '\"', '\\', '<', '>', '*'], ""); + word_buffer = + word.replace(&['(', '/', '\"', '\\', '<', '>', '»'], ""); word_buffer = word_buffer.replace("ſ", "s"); words.push(word_buffer); } else { - words.push(String::from(word)); + let push_string = word.replace("ſ", "s"); + words.push(String::from(push_string)); } } input_words_by_line.push(words); @@ -83,27 +84,35 @@ pub mod utils { special_chars_front = &captures["specialfront"]; special_chars_back = &captures["specialback"]; text = &captures["text"]; - let mut out_str: String = String::from(""); - out_str = out_str + special_chars_front + correct(String::from(text), list_path.clone()).as_str() + special_chars_back; + let mut out_str: String = String::from(""); + out_str = out_str + + special_chars_front + + correct(String::from(text), list_path.clone()).as_str() + + special_chars_back; output.push_str(out_str.as_str()); + output.push_str(" "); + } else { + output.push_str(correct(String::from(word), list_path.clone()).as_str()); + output.push_str(" "); } } output.push('\n'); } let mut output_file: File = match File::create_new(output_path) { Ok(f) => f, - Err(e) => panic!("Cannot write output file: {}", e) + Err(e) => panic!("Cannot write output file: {}", e), }; - match write!(output_file, "{}", output){ + match write!(output_file, "{}", output) { Ok(_) => exit(1), - Err(e) => panic!("Cannot write output file: {}", e) + Err(e) => panic!("Cannot write output file: {}", e), }; } - + fn read_lines

(filename: P) -> io::Result>> - where P: AsRef, { + where + P: AsRef, + { let file = File::open(filename)?; Ok(io::BufReader::new(file).lines()) } } - diff --git a/src/main.rs b/src/main.rs index 135eed8..a82cac2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -20,6 +20,7 @@ enum Commands { struct WordArgs { #[arg(short,long)] input: String, + #[arg(short,long)] list_path: String } @@ -27,7 +28,9 @@ struct WordArgs { struct FileArgs { #[arg(short,long)] input: String, + #[arg(short,long)] list_path: String, + #[arg(short,long)] output: String } diff --git a/wordlist-german.txt b/wordlist-german.txt index adeac44..69e14db 100644 --- a/wordlist-german.txt +++ b/wordlist-german.txt @@ -1908814,3 +1908814,19 @@ zytotoxischen zytotoxischer zytotoxisches zzgl +Ein +Vorrede +Thuet +Nicht-Seyn +thut +Als +daß +muß +Fuß +sey +bringet +Bringet +Schaale +Krummacher +theurer +seyn