various fixes
This commit is contained in:
@@ -15,8 +15,8 @@ daß dieſes prophetiſche Wunder ſymboliſche Bedeutung hat und
|
|||||||
geiſtlich zu allen Zeiten in der Gemeinde wiederholt werden muß.
|
geiſtlich zu allen Zeiten in der Gemeinde wiederholt werden muß.
|
||||||
Wenigſtens müſſen wir beſtändig um die geiſtliche Erneuerung
|
Wenigſtens müſſen wir beſtändig um die geiſtliche Erneuerung
|
||||||
deſſelben flehen. Wir ſehnen uns danach, wir erflehen ſie. Auch
|
deſſelben flehen. Wir ſehnen uns danach, wir erflehen ſie. Auch
|
||||||
unſere Brunnen ſind abgeſtanden , faul, vergiftet und hauchen
|
unſere Brunnen ſind abgeſtanden, faul, vergiftet und hauchen
|
||||||
den Tod aus , der nicht eine Stadt und Gegend , ſondern eine
|
den Tod aus, der nicht eine Stadt und Gegend, ſondern eine
|
||||||
Welt zu verderben droht, und in unſern Tagen ſchreklicher als
|
Welt zu verderben droht, und in unſern Tagen ſchreklicher als
|
||||||
jemals vorher wüthet. Jene vergifteten Brunnen ſind die Wiſ-
|
jemals vorher wüthet. Jene vergifteten Brunnen ſind die Wiſ-
|
||||||
ſenſc<aften, die in ſchreFlicher Lo8gebundenheit von allem
|
ſenſc<aften, die in ſchreFlicher Lo8gebundenheit von allem
|
||||||
|
47
src/lib.rs
47
src/lib.rs
@@ -1,11 +1,11 @@
|
|||||||
pub mod utils {
|
pub mod utils {
|
||||||
use std::process::exit;
|
use fuzzt::algorithms::{levenshtein, sorensen_dice};
|
||||||
|
use regex::Regex;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{self, BufRead, Result, Write};
|
use std::io::{self, BufRead, Result, Write};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use fuzzt::algorithms::{ levenshtein, sorensen_dice};
|
use std::process::exit;
|
||||||
use regex::Regex;
|
|
||||||
|
|
||||||
pub fn correct(word: String, list_path: String) -> String {
|
pub fn correct(word: String, list_path: String) -> String {
|
||||||
let mut list_correct_words: Vec<String> = Vec::new();
|
let mut list_correct_words: Vec<String> = Vec::new();
|
||||||
if let Ok(lines) = read_lines(list_path) {
|
if let Ok(lines) = read_lines(list_path) {
|
||||||
@@ -16,7 +16,7 @@ pub mod utils {
|
|||||||
let list_iter = list_correct_words.iter();
|
let list_iter = list_correct_words.iter();
|
||||||
let mut has_match: bool = false;
|
let mut has_match: bool = false;
|
||||||
for correct_word in list_iter.clone() {
|
for correct_word in list_iter.clone() {
|
||||||
if word.eq(correct_word) {
|
if word.eq(correct_word) || word.to_lowercase().eq(correct_word) {
|
||||||
has_match = true;
|
has_match = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -43,9 +43,9 @@ pub mod utils {
|
|||||||
return String::from(closest);
|
return String::from(closest);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn correct_file (file_path: String, list_path: String, output_path: String) {
|
pub fn correct_file(file_path: String, list_path: String, output_path: String) {
|
||||||
let re : Regex = Regex::new(r"[\w]+[\W]+").unwrap();
|
let re: Regex = Regex::new(r"[\w]+[\W]+").unwrap();
|
||||||
let re2: Regex = Regex::new(r"(?P<specialfront>[\W&&[^\s]&&[^\n]]*)(?P<text>[\w]+)(?P<specialback>[\W&&[^\s]&&[^\n]]*)").unwrap();
|
let re2: Regex = Regex::new(r"(?P<specialfront>[\W&&[^\s]&&[^\n]]*)(?P<text>[\w]+)(?P<specialback>[\W&&[^\s]&&[^\n]]*)").unwrap();
|
||||||
let mut input_file_lines: Vec<String> = Vec::new();
|
let mut input_file_lines: Vec<String> = Vec::new();
|
||||||
if let Ok(lines) = read_lines(file_path) {
|
if let Ok(lines) = read_lines(file_path) {
|
||||||
@@ -58,14 +58,15 @@ pub mod utils {
|
|||||||
let line_iter = line.split_whitespace();
|
let line_iter = line.split_whitespace();
|
||||||
let mut words: Vec<String> = Vec::new();
|
let mut words: Vec<String> = Vec::new();
|
||||||
for word in line_iter {
|
for word in line_iter {
|
||||||
|
|
||||||
if re.is_match(word) {
|
if re.is_match(word) {
|
||||||
let mut word_buffer: String;
|
let mut word_buffer: String;
|
||||||
word_buffer = word.replace(&['(',')', '/', '\"', '\\', '<', '>', '*'], "");
|
word_buffer =
|
||||||
|
word.replace(&['(', '/', '\"', '\\', '<', '>', '»'], "");
|
||||||
word_buffer = word_buffer.replace("ſ", "s");
|
word_buffer = word_buffer.replace("ſ", "s");
|
||||||
words.push(word_buffer);
|
words.push(word_buffer);
|
||||||
} else {
|
} else {
|
||||||
words.push(String::from(word));
|
let push_string = word.replace("ſ", "s");
|
||||||
|
words.push(String::from(push_string));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
input_words_by_line.push(words);
|
input_words_by_line.push(words);
|
||||||
@@ -83,27 +84,35 @@ pub mod utils {
|
|||||||
special_chars_front = &captures["specialfront"];
|
special_chars_front = &captures["specialfront"];
|
||||||
special_chars_back = &captures["specialback"];
|
special_chars_back = &captures["specialback"];
|
||||||
text = &captures["text"];
|
text = &captures["text"];
|
||||||
let mut out_str: String = String::from("");
|
let mut out_str: String = String::from("");
|
||||||
out_str = out_str + special_chars_front + correct(String::from(text), list_path.clone()).as_str() + special_chars_back;
|
out_str = out_str
|
||||||
|
+ special_chars_front
|
||||||
|
+ correct(String::from(text), list_path.clone()).as_str()
|
||||||
|
+ special_chars_back;
|
||||||
output.push_str(out_str.as_str());
|
output.push_str(out_str.as_str());
|
||||||
|
output.push_str(" ");
|
||||||
|
} else {
|
||||||
|
output.push_str(correct(String::from(word), list_path.clone()).as_str());
|
||||||
|
output.push_str(" ");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
output.push('\n');
|
output.push('\n');
|
||||||
}
|
}
|
||||||
let mut output_file: File = match File::create_new(output_path) {
|
let mut output_file: File = match File::create_new(output_path) {
|
||||||
Ok(f) => f,
|
Ok(f) => f,
|
||||||
Err(e) => panic!("Cannot write output file: {}", e)
|
Err(e) => panic!("Cannot write output file: {}", e),
|
||||||
};
|
};
|
||||||
match write!(output_file, "{}", output){
|
match write!(output_file, "{}", output) {
|
||||||
Ok(_) => exit(1),
|
Ok(_) => exit(1),
|
||||||
Err(e) => panic!("Cannot write output file: {}", e)
|
Err(e) => panic!("Cannot write output file: {}", e),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>>
|
fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>>
|
||||||
where P: AsRef<Path>, {
|
where
|
||||||
|
P: AsRef<Path>,
|
||||||
|
{
|
||||||
let file = File::open(filename)?;
|
let file = File::open(filename)?;
|
||||||
Ok(io::BufReader::new(file).lines())
|
Ok(io::BufReader::new(file).lines())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -20,6 +20,7 @@ enum Commands {
|
|||||||
struct WordArgs {
|
struct WordArgs {
|
||||||
#[arg(short,long)]
|
#[arg(short,long)]
|
||||||
input: String,
|
input: String,
|
||||||
|
#[arg(short,long)]
|
||||||
list_path: String
|
list_path: String
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -27,7 +28,9 @@ struct WordArgs {
|
|||||||
struct FileArgs {
|
struct FileArgs {
|
||||||
#[arg(short,long)]
|
#[arg(short,long)]
|
||||||
input: String,
|
input: String,
|
||||||
|
#[arg(short,long)]
|
||||||
list_path: String,
|
list_path: String,
|
||||||
|
#[arg(short,long)]
|
||||||
output: String
|
output: String
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1908814,3 +1908814,19 @@ zytotoxischen
|
|||||||
zytotoxischer
|
zytotoxischer
|
||||||
zytotoxisches
|
zytotoxisches
|
||||||
zzgl
|
zzgl
|
||||||
|
Ein
|
||||||
|
Vorrede
|
||||||
|
Thuet
|
||||||
|
Nicht-Seyn
|
||||||
|
thut
|
||||||
|
Als
|
||||||
|
daß
|
||||||
|
muß
|
||||||
|
Fuß
|
||||||
|
sey
|
||||||
|
bringet
|
||||||
|
Bringet
|
||||||
|
Schaale
|
||||||
|
Krummacher
|
||||||
|
theurer
|
||||||
|
seyn
|
||||||
|
Reference in New Issue
Block a user