various fixes

This commit is contained in:
2025-07-13 22:04:16 +02:00
parent 768bab6d57
commit 73ba03a816
4 changed files with 49 additions and 21 deletions

View File

@@ -15,8 +15,8 @@ daß dieſes prophetiſche Wunder ſymboliſche Bedeutung hat und
geiſtlich zu allen Zeiten in der Gemeinde wiederholt werden muß. geiſtlich zu allen Zeiten in der Gemeinde wiederholt werden muß.
Wenigſtens müſſen wir beſtändig um die geiſtliche Erneuerung Wenigſtens müſſen wir beſtändig um die geiſtliche Erneuerung
deſſelben flehen. Wir ſehnen uns danach, wir erflehen ſie. Auch deſſelben flehen. Wir ſehnen uns danach, wir erflehen ſie. Auch
unſere Brunnen ſind abgeſtanden , faul, vergiftet und hauchen unſere Brunnen ſind abgeſtanden, faul, vergiftet und hauchen
den Tod aus , der nicht eine Stadt und Gegend , ſondern eine den Tod aus, der nicht eine Stadt und Gegend, ſondern eine
Welt zu verderben droht, und in unſern Tagen ſchreklicher als Welt zu verderben droht, und in unſern Tagen ſchreklicher als
jemals vorher wüthet. Jene vergifteten Brunnen ſind die Wiſ- jemals vorher wüthet. Jene vergifteten Brunnen ſind die Wiſ-
ſenſc<aften, die in ſchreFlicher Lo8gebundenheit von allem ſenſc<aften, die in ſchreFlicher Lo8gebundenheit von allem

View File

@@ -1,11 +1,11 @@
pub mod utils { pub mod utils {
use std::process::exit; use fuzzt::algorithms::{levenshtein, sorensen_dice};
use regex::Regex;
use std::fs::File; use std::fs::File;
use std::io::{self, BufRead, Result, Write}; use std::io::{self, BufRead, Result, Write};
use std::path::Path; use std::path::Path;
use fuzzt::algorithms::{ levenshtein, sorensen_dice}; use std::process::exit;
use regex::Regex;
pub fn correct(word: String, list_path: String) -> String { pub fn correct(word: String, list_path: String) -> String {
let mut list_correct_words: Vec<String> = Vec::new(); let mut list_correct_words: Vec<String> = Vec::new();
if let Ok(lines) = read_lines(list_path) { if let Ok(lines) = read_lines(list_path) {
@@ -16,7 +16,7 @@ pub mod utils {
let list_iter = list_correct_words.iter(); let list_iter = list_correct_words.iter();
let mut has_match: bool = false; let mut has_match: bool = false;
for correct_word in list_iter.clone() { for correct_word in list_iter.clone() {
if word.eq(correct_word) { if word.eq(correct_word) || word.to_lowercase().eq(correct_word) {
has_match = true; has_match = true;
} }
} }
@@ -43,9 +43,9 @@ pub mod utils {
return String::from(closest); return String::from(closest);
} }
} }
pub fn correct_file (file_path: String, list_path: String, output_path: String) { pub fn correct_file(file_path: String, list_path: String, output_path: String) {
let re : Regex = Regex::new(r"[\w]+[\W]+").unwrap(); let re: Regex = Regex::new(r"[\w]+[\W]+").unwrap();
let re2: Regex = Regex::new(r"(?P<specialfront>[\W&&[^\s]&&[^\n]]*)(?P<text>[\w]+)(?P<specialback>[\W&&[^\s]&&[^\n]]*)").unwrap(); let re2: Regex = Regex::new(r"(?P<specialfront>[\W&&[^\s]&&[^\n]]*)(?P<text>[\w]+)(?P<specialback>[\W&&[^\s]&&[^\n]]*)").unwrap();
let mut input_file_lines: Vec<String> = Vec::new(); let mut input_file_lines: Vec<String> = Vec::new();
if let Ok(lines) = read_lines(file_path) { if let Ok(lines) = read_lines(file_path) {
@@ -58,14 +58,15 @@ pub mod utils {
let line_iter = line.split_whitespace(); let line_iter = line.split_whitespace();
let mut words: Vec<String> = Vec::new(); let mut words: Vec<String> = Vec::new();
for word in line_iter { for word in line_iter {
if re.is_match(word) { if re.is_match(word) {
let mut word_buffer: String; let mut word_buffer: String;
word_buffer = word.replace(&['(',')', '/', '\"', '\\', '<', '>', '*'], ""); word_buffer =
word.replace(&['(', '/', '\"', '\\', '<', '>', '»'], "");
word_buffer = word_buffer.replace("ſ", "s"); word_buffer = word_buffer.replace("ſ", "s");
words.push(word_buffer); words.push(word_buffer);
} else { } else {
words.push(String::from(word)); let push_string = word.replace("ſ", "s");
words.push(String::from(push_string));
} }
} }
input_words_by_line.push(words); input_words_by_line.push(words);
@@ -83,27 +84,35 @@ pub mod utils {
special_chars_front = &captures["specialfront"]; special_chars_front = &captures["specialfront"];
special_chars_back = &captures["specialback"]; special_chars_back = &captures["specialback"];
text = &captures["text"]; text = &captures["text"];
let mut out_str: String = String::from(""); let mut out_str: String = String::from("");
out_str = out_str + special_chars_front + correct(String::from(text), list_path.clone()).as_str() + special_chars_back; out_str = out_str
+ special_chars_front
+ correct(String::from(text), list_path.clone()).as_str()
+ special_chars_back;
output.push_str(out_str.as_str()); output.push_str(out_str.as_str());
output.push_str(" ");
} else {
output.push_str(correct(String::from(word), list_path.clone()).as_str());
output.push_str(" ");
} }
} }
output.push('\n'); output.push('\n');
} }
let mut output_file: File = match File::create_new(output_path) { let mut output_file: File = match File::create_new(output_path) {
Ok(f) => f, Ok(f) => f,
Err(e) => panic!("Cannot write output file: {}", e) Err(e) => panic!("Cannot write output file: {}", e),
}; };
match write!(output_file, "{}", output){ match write!(output_file, "{}", output) {
Ok(_) => exit(1), Ok(_) => exit(1),
Err(e) => panic!("Cannot write output file: {}", e) Err(e) => panic!("Cannot write output file: {}", e),
}; };
} }
fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>> fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>>
where P: AsRef<Path>, { where
P: AsRef<Path>,
{
let file = File::open(filename)?; let file = File::open(filename)?;
Ok(io::BufReader::new(file).lines()) Ok(io::BufReader::new(file).lines())
} }
} }

View File

@@ -20,6 +20,7 @@ enum Commands {
struct WordArgs { struct WordArgs {
#[arg(short,long)] #[arg(short,long)]
input: String, input: String,
#[arg(short,long)]
list_path: String list_path: String
} }
@@ -27,7 +28,9 @@ struct WordArgs {
struct FileArgs { struct FileArgs {
#[arg(short,long)] #[arg(short,long)]
input: String, input: String,
#[arg(short,long)]
list_path: String, list_path: String,
#[arg(short,long)]
output: String output: String
} }

View File

@@ -1908814,3 +1908814,19 @@ zytotoxischen
zytotoxischer zytotoxischer
zytotoxisches zytotoxisches
zzgl zzgl
Ein
Vorrede
Thuet
Nicht-Seyn
thut
Als
daß
muß
Fuß
sey
bringet
Bringet
Schaale
Krummacher
theurer
seyn