all features implemented

This commit is contained in:
2025-07-13 21:24:18 +02:00
parent a6a1c82bc6
commit e1e9b31f00

View File

@@ -1,6 +1,7 @@
pub mod utils { pub mod utils {
use std::process::exit;
use std::fs::File; use std::fs::File;
use std::io::{self, BufRead, Result}; use std::io::{self, BufRead, Result, Write};
use std::path::Path; use std::path::Path;
use fuzzt::algorithms::{ levenshtein, sorensen_dice}; use fuzzt::algorithms::{ levenshtein, sorensen_dice};
use regex::Regex; use regex::Regex;
@@ -45,13 +46,14 @@ pub mod utils {
pub fn correct_file (file_path: String, list_path: String, output_path: String) { pub fn correct_file (file_path: String, list_path: String, output_path: String) {
let re : Regex = Regex::new(r"[\w]+[\W]+").unwrap(); let re : Regex = Regex::new(r"[\w]+[\W]+").unwrap();
let re2: Regex = Regex::new(r"(?P<specialfront>[\W&&[^\s]&&[^\n]]*)(?P<text>[\w]+)(?P<specialback>[\W&&[^\s]&&[^\n]]*)").unwrap();
let mut input_file_lines: Vec<String> = Vec::new(); let mut input_file_lines: Vec<String> = Vec::new();
if let Ok(lines) = read_lines(file_path) { if let Ok(lines) = read_lines(file_path) {
lines.map_while(Result::ok).for_each(|line| { lines.map_while(Result::ok).for_each(|line| {
input_file_lines.push(line); input_file_lines.push(line);
}); });
} }
let mut input_file_words: Vec<Vec<String>> = Vec::new(); let mut input_words_by_line: Vec<Vec<String>> = Vec::new();
for line in input_file_lines { for line in input_file_lines {
let line_iter = line.split_whitespace(); let line_iter = line.split_whitespace();
let mut words: Vec<String> = Vec::new(); let mut words: Vec<String> = Vec::new();
@@ -61,15 +63,41 @@ pub mod utils {
let mut word_buffer: String; let mut word_buffer: String;
word_buffer = word.replace(&['(',')', '/', '\"', '\\', '<', '>', '*'], ""); word_buffer = word.replace(&['(',')', '/', '\"', '\\', '<', '>', '*'], "");
word_buffer = word_buffer.replace("ſ", "s"); word_buffer = word_buffer.replace("ſ", "s");
words.push(word_buffer); words.push(word_buffer);
} else { } else {
words.push(String::from(word)); words.push(String::from(word));
} }
} }
input_file_words.push(words); input_words_by_line.push(words);
} }
let mut output: String = String::new();
let input_line_iter = input_words_by_line.iter();
for line in input_line_iter {
let input_word_iter = line.iter();
for word in input_word_iter {
let special_chars_front: &str;
let special_chars_back: &str;
let text: &str;
if re.is_match(&word) {
let captures = re2.captures(&word).unwrap();
special_chars_front = &captures["specialfront"];
special_chars_back = &captures["specialback"];
text = &captures["text"];
let mut out_str: String = String::from("");
out_str = out_str + special_chars_front + correct(String::from(text), list_path.clone()).as_str() + special_chars_back;
output.push_str(out_str.as_str());
}
}
output.push('\n');
}
let mut output_file: File = match File::create_new(output_path) {
Ok(f) => f,
Err(e) => panic!("Cannot write output file: {}", e)
};
match write!(output_file, "{}", output){
Ok(_) => exit(1),
Err(e) => panic!("Cannot write output file: {}", e)
};
} }
fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>> fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>>