various fixes

This commit is contained in:
2025-07-13 22:04:16 +02:00
parent 768bab6d57
commit 73ba03a816
4 changed files with 49 additions and 21 deletions

View File

@@ -15,8 +15,8 @@ daß dieſes prophetiſche Wunder ſymboliſche Bedeutung hat und
geiſtlich zu allen Zeiten in der Gemeinde wiederholt werden muß.
Wenigſtens müſſen wir beſtändig um die geiſtliche Erneuerung
deſſelben flehen. Wir ſehnen uns danach, wir erflehen ſie. Auch
unſere Brunnen ſind abgeſtanden , faul, vergiftet und hauchen
den Tod aus , der nicht eine Stadt und Gegend , ſondern eine
unſere Brunnen ſind abgeſtanden, faul, vergiftet und hauchen
den Tod aus, der nicht eine Stadt und Gegend, ſondern eine
Welt zu verderben droht, und in unſern Tagen ſchreklicher als
jemals vorher wüthet. Jene vergifteten Brunnen ſind die Wiſ-
ſenſc<aften, die in ſchreFlicher Lo8gebundenheit von allem

View File

@@ -1,11 +1,11 @@
pub mod utils {
use std::process::exit;
use fuzzt::algorithms::{levenshtein, sorensen_dice};
use regex::Regex;
use std::fs::File;
use std::io::{self, BufRead, Result, Write};
use std::path::Path;
use fuzzt::algorithms::{ levenshtein, sorensen_dice};
use regex::Regex;
use std::process::exit;
pub fn correct(word: String, list_path: String) -> String {
let mut list_correct_words: Vec<String> = Vec::new();
if let Ok(lines) = read_lines(list_path) {
@@ -16,7 +16,7 @@ pub mod utils {
let list_iter = list_correct_words.iter();
let mut has_match: bool = false;
for correct_word in list_iter.clone() {
if word.eq(correct_word) {
if word.eq(correct_word) || word.to_lowercase().eq(correct_word) {
has_match = true;
}
}
@@ -43,9 +43,9 @@ pub mod utils {
return String::from(closest);
}
}
pub fn correct_file (file_path: String, list_path: String, output_path: String) {
let re : Regex = Regex::new(r"[\w]+[\W]+").unwrap();
pub fn correct_file(file_path: String, list_path: String, output_path: String) {
let re: Regex = Regex::new(r"[\w]+[\W]+").unwrap();
let re2: Regex = Regex::new(r"(?P<specialfront>[\W&&[^\s]&&[^\n]]*)(?P<text>[\w]+)(?P<specialback>[\W&&[^\s]&&[^\n]]*)").unwrap();
let mut input_file_lines: Vec<String> = Vec::new();
if let Ok(lines) = read_lines(file_path) {
@@ -58,14 +58,15 @@ pub mod utils {
let line_iter = line.split_whitespace();
let mut words: Vec<String> = Vec::new();
for word in line_iter {
if re.is_match(word) {
let mut word_buffer: String;
word_buffer = word.replace(&['(',')', '/', '\"', '\\', '<', '>', '*'], "");
word_buffer =
word.replace(&['(', '/', '\"', '\\', '<', '>', '»'], "");
word_buffer = word_buffer.replace("ſ", "s");
words.push(word_buffer);
} else {
words.push(String::from(word));
let push_string = word.replace("ſ", "s");
words.push(String::from(push_string));
}
}
input_words_by_line.push(words);
@@ -83,27 +84,35 @@ pub mod utils {
special_chars_front = &captures["specialfront"];
special_chars_back = &captures["specialback"];
text = &captures["text"];
let mut out_str: String = String::from("");
out_str = out_str + special_chars_front + correct(String::from(text), list_path.clone()).as_str() + special_chars_back;
let mut out_str: String = String::from("");
out_str = out_str
+ special_chars_front
+ correct(String::from(text), list_path.clone()).as_str()
+ special_chars_back;
output.push_str(out_str.as_str());
output.push_str(" ");
} else {
output.push_str(correct(String::from(word), list_path.clone()).as_str());
output.push_str(" ");
}
}
output.push('\n');
}
let mut output_file: File = match File::create_new(output_path) {
Ok(f) => f,
Err(e) => panic!("Cannot write output file: {}", e)
Err(e) => panic!("Cannot write output file: {}", e),
};
match write!(output_file, "{}", output){
match write!(output_file, "{}", output) {
Ok(_) => exit(1),
Err(e) => panic!("Cannot write output file: {}", e)
Err(e) => panic!("Cannot write output file: {}", e),
};
}
fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>>
where P: AsRef<Path>, {
where
P: AsRef<Path>,
{
let file = File::open(filename)?;
Ok(io::BufReader::new(file).lines())
}
}

View File

@@ -20,6 +20,7 @@ enum Commands {
struct WordArgs {
#[arg(short,long)]
input: String,
#[arg(short,long)]
list_path: String
}
@@ -27,7 +28,9 @@ struct WordArgs {
struct FileArgs {
#[arg(short,long)]
input: String,
#[arg(short,long)]
list_path: String,
#[arg(short,long)]
output: String
}

View File

@@ -1908814,3 +1908814,19 @@ zytotoxischen
zytotoxischer
zytotoxisches
zzgl
Ein
Vorrede
Thuet
Nicht-Seyn
thut
Als
daß
muß
Fuß
sey
bringet
Bringet
Schaale
Krummacher
theurer
seyn