various fixes

This commit is contained in:
2025-07-13 22:04:16 +02:00
parent 768bab6d57
commit 73ba03a816
4 changed files with 49 additions and 21 deletions

View File

@@ -1,10 +1,10 @@
pub mod utils { pub mod utils {
use std::process::exit; use fuzzt::algorithms::{levenshtein, sorensen_dice};
use regex::Regex;
use std::fs::File; use std::fs::File;
use std::io::{self, BufRead, Result, Write}; use std::io::{self, BufRead, Result, Write};
use std::path::Path; use std::path::Path;
use fuzzt::algorithms::{ levenshtein, sorensen_dice}; use std::process::exit;
use regex::Regex;
pub fn correct(word: String, list_path: String) -> String { pub fn correct(word: String, list_path: String) -> String {
let mut list_correct_words: Vec<String> = Vec::new(); let mut list_correct_words: Vec<String> = Vec::new();
@@ -16,7 +16,7 @@ pub mod utils {
let list_iter = list_correct_words.iter(); let list_iter = list_correct_words.iter();
let mut has_match: bool = false; let mut has_match: bool = false;
for correct_word in list_iter.clone() { for correct_word in list_iter.clone() {
if word.eq(correct_word) { if word.eq(correct_word) || word.to_lowercase().eq(correct_word) {
has_match = true; has_match = true;
} }
} }
@@ -58,14 +58,15 @@ pub mod utils {
let line_iter = line.split_whitespace(); let line_iter = line.split_whitespace();
let mut words: Vec<String> = Vec::new(); let mut words: Vec<String> = Vec::new();
for word in line_iter { for word in line_iter {
if re.is_match(word) { if re.is_match(word) {
let mut word_buffer: String; let mut word_buffer: String;
word_buffer = word.replace(&['(',')', '/', '\"', '\\', '<', '>', '*'], ""); word_buffer =
word.replace(&['(', '/', '\"', '\\', '<', '>', '»'], "");
word_buffer = word_buffer.replace("ſ", "s"); word_buffer = word_buffer.replace("ſ", "s");
words.push(word_buffer); words.push(word_buffer);
} else { } else {
words.push(String::from(word)); let push_string = word.replace("ſ", "s");
words.push(String::from(push_string));
} }
} }
input_words_by_line.push(words); input_words_by_line.push(words);
@@ -84,26 +85,34 @@ pub mod utils {
special_chars_back = &captures["specialback"]; special_chars_back = &captures["specialback"];
text = &captures["text"]; text = &captures["text"];
let mut out_str: String = String::from(""); let mut out_str: String = String::from("");
out_str = out_str + special_chars_front + correct(String::from(text), list_path.clone()).as_str() + special_chars_back; out_str = out_str
+ special_chars_front
+ correct(String::from(text), list_path.clone()).as_str()
+ special_chars_back;
output.push_str(out_str.as_str()); output.push_str(out_str.as_str());
output.push_str(" ");
} else {
output.push_str(correct(String::from(word), list_path.clone()).as_str());
output.push_str(" ");
} }
} }
output.push('\n'); output.push('\n');
} }
let mut output_file: File = match File::create_new(output_path) { let mut output_file: File = match File::create_new(output_path) {
Ok(f) => f, Ok(f) => f,
Err(e) => panic!("Cannot write output file: {}", e) Err(e) => panic!("Cannot write output file: {}", e),
}; };
match write!(output_file, "{}", output) { match write!(output_file, "{}", output) {
Ok(_) => exit(1), Ok(_) => exit(1),
Err(e) => panic!("Cannot write output file: {}", e) Err(e) => panic!("Cannot write output file: {}", e),
}; };
} }
fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>> fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>>
where P: AsRef<Path>, { where
P: AsRef<Path>,
{
let file = File::open(filename)?; let file = File::open(filename)?;
Ok(io::BufReader::new(file).lines()) Ok(io::BufReader::new(file).lines())
} }
} }

View File

@@ -20,6 +20,7 @@ enum Commands {
struct WordArgs { struct WordArgs {
#[arg(short,long)] #[arg(short,long)]
input: String, input: String,
#[arg(short,long)]
list_path: String list_path: String
} }
@@ -27,7 +28,9 @@ struct WordArgs {
struct FileArgs { struct FileArgs {
#[arg(short,long)] #[arg(short,long)]
input: String, input: String,
#[arg(short,long)]
list_path: String, list_path: String,
#[arg(short,long)]
output: String output: String
} }

View File

@@ -1908814,3 +1908814,19 @@ zytotoxischen
zytotoxischer zytotoxischer
zytotoxisches zytotoxisches
zzgl zzgl
Ein
Vorrede
Thuet
Nicht-Seyn
thut
Als
daß
muß
Fuß
sey
bringet
Bringet
Schaale
Krummacher
theurer
seyn