Compare commits
2 Commits
Author | SHA1 | Date | |
---|---|---|---|
167036e144 | |||
e1555e1432 |
157
Cargo.lock
generated
157
Cargo.lock
generated
@@ -89,10 +89,10 @@ version = "4.5.41"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"heck 0.5.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.104",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -107,36 +107,113 @@ version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-deque"
|
||||
version = "0.8.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
|
||||
dependencies = [
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-epoch"
|
||||
version = "0.9.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
|
||||
dependencies = [
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.15.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
||||
|
||||
[[package]]
|
||||
name = "fuzzt"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a15f3d0fa42283a765e5fb609683ddab4ee4ff245d8db66a24d926c05e518c6"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
|
||||
|
||||
[[package]]
|
||||
name = "is_terminal_polyfill"
|
||||
version = "1.70.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
||||
|
||||
[[package]]
|
||||
name = "levenshtein"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "db13adb97ab515a3691f56e4dbab09283d0b86cb45abd991d8634a9d6f501760"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.174"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
|
||||
|
||||
[[package]]
|
||||
name = "maplit"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.7.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
|
||||
|
||||
[[package]]
|
||||
name = "num_cpus"
|
||||
version = "1.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell_polyfill"
|
||||
version = "1.70.1"
|
||||
@@ -161,6 +238,26 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
version = "1.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
|
||||
dependencies = [
|
||||
"either",
|
||||
"rayon-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon-core"
|
||||
version = "1.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
|
||||
dependencies = [
|
||||
"crossbeam-deque",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.11.1"
|
||||
@@ -190,12 +287,62 @@ version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d"
|
||||
|
||||
[[package]]
|
||||
name = "spinners"
|
||||
version = "4.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a0ef947f358b9c238923f764c72a4a9d42f2d637c46e059dbd319d6e7cfb4f82"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"maplit",
|
||||
"strum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||
|
||||
[[package]]
|
||||
name = "strum"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
|
||||
dependencies = [
|
||||
"strum_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strum_macros"
|
||||
version = "0.24.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
|
||||
dependencies = [
|
||||
"heck 0.4.1",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.109"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.104"
|
||||
@@ -209,12 +356,16 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "text-correction"
|
||||
version = "0.1.0"
|
||||
version = "0.1.1"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"fuzzt",
|
||||
"levenshtein",
|
||||
"log",
|
||||
"num_cpus",
|
||||
"rayon",
|
||||
"regex",
|
||||
"spinners",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
15
Cargo.toml
15
Cargo.toml
@@ -1,10 +1,23 @@
|
||||
[package]
|
||||
name = "text-correction"
|
||||
version = "0.1.0"
|
||||
version = "0.1.1"
|
||||
edition = "2021"
|
||||
license-file = "LICENSE"
|
||||
readme = "README.md"
|
||||
description = "Small CLI tool that corrects a given input file based on a word list given as input."
|
||||
authors = ["nihil carcosa <nihil@valhrafnaz.gay>"]
|
||||
homepage = "https://valhrafnaz.gay"
|
||||
repository = "https://git.valhrafnaz.gay/valhrafnaz/text-correction"
|
||||
categories = ["command-line-utilities"]
|
||||
keywords = ["text-processing"]
|
||||
publish = ["gitea"]
|
||||
|
||||
[dependencies]
|
||||
clap = { version = "4.5.41", features = ["derive"] }
|
||||
fuzzt = "0.3.1"
|
||||
levenshtein = "1.0.5"
|
||||
log = "0.4.27"
|
||||
num_cpus = "1.17.0"
|
||||
rayon = "1.10.0"
|
||||
regex = "1.11.1"
|
||||
spinners = "4.1.1"
|
||||
|
32
output.txt
Normal file
32
output.txt
Normal file
@@ -0,0 +1,32 @@
|
||||
Vorrede,
|
||||
|
||||
Ein schönes Wort jenes Wort des Propheten: Thuet
|
||||
Salz darein!“
|
||||
|
||||
Als zu dem Propheten Elisa die Männer von Jericho kam
|
||||
amen und klagten, daß das Wasser der Stadt böse und das Land
|
||||
unfruchtbar sey, sprach er: Bringet mir her eine neue Schaale
|
||||
und thut Salz darein!“ und sie brachten es ihm Hz da ging
|
||||
er hinaus zu der Wasserquelle und warf das Salz hinein und
|
||||
machte sie mit dem Worte des Herrn gesund*).
|
||||
|
||||
unser theurer Krummacher hat es am lebhaftesten gefühlt,
|
||||
daß dieses prophetische runder symbolische Bedeutung hat und
|
||||
geistlich zu allen Zeiten in der Gemeinde wiederholt werden muß.
|
||||
wenigstens müssen wir beständig um die geistliche Erneuerung
|
||||
desselben flehen. Wirt sehnen uns danach, wir erflehen sie. auch
|
||||
unsere Brunnen sind abgestanden, faul, vergiftet und hauchen
|
||||
den Tod aus, der nicht eine Stadt und Gegend, sondern eine
|
||||
Welt zu verderben droht, und in unser Tagens schrecklicher als
|
||||
jemals vorher wütet. Gene vergifteten Brunnen sind die bis-
|
||||
sesshaften, die in schrecklicher Loßgebundenheit von allem
|
||||
göttlichen mit ihren selbstgemachten Gesetzen das gesamte Unis-
|
||||
versus zu umschließen sich anmaßen die Künste, die ihrem
|
||||
ursprünglichen Beruf, Weissagerinnen zu seyn vom jenseits,
|
||||
hohnlachend Chalet gegeben haben, um die Sünde mit dem Glanze
|
||||
der Verklärung zu umwerben Hz; eine Theologie, die aus dem
|
||||
Eignens redet, wie der Vater der Lügens, und die inwendig ca-
|
||||
kanaanitisch gesinnt ficht, den Leviten AA Rob heuchlerisch) umgeworfen
|
||||
hat; eine Philosophie, welche das Nichte- des Allee-
|
||||
|
||||
Hegel übe. Kunst au. Re. AA
|
171
src/lib.rs
171
src/lib.rs
@@ -1,12 +1,14 @@
|
||||
pub mod utils {
|
||||
use std::cell::OnceCell;
|
||||
use std::cmp::Ordering;
|
||||
use fuzzt::algorithms::{levenshtein, sorensen_dice};
|
||||
use regex::Regex;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufRead, Result, Write};
|
||||
use std::path::Path;
|
||||
use std::process::exit;
|
||||
use rayon::prelude::*;
|
||||
|
||||
pub fn correct(word: String, list_path: String) -> String {
|
||||
pub fn correct(word: String, list_path: &str) -> String {
|
||||
let mut list_correct_words: Vec<String> = Vec::new();
|
||||
if let Ok(lines) = read_lines(list_path) {
|
||||
lines.map_while(Result::ok).for_each(|line| {
|
||||
@@ -14,37 +16,79 @@ pub mod utils {
|
||||
});
|
||||
}
|
||||
let list_iter = list_correct_words.iter();
|
||||
let mut has_match: bool = false;
|
||||
for correct_word in list_iter.clone() {
|
||||
if word.eq(correct_word) || word.to_lowercase().eq(correct_word) {
|
||||
has_match = true;
|
||||
}
|
||||
}
|
||||
if has_match {
|
||||
return word;
|
||||
} else {
|
||||
let mut closest: &str = "";
|
||||
let mut closest_dist: usize = 10000;
|
||||
let mut closest_dist_sorensen: f64 = 0.0f64;
|
||||
for correct_word in list_iter {
|
||||
//println!("Checking {}, dist: {}, current closest: {}, (damerau: {}, Sørensen-Dice: {}, Jaro-Winkler: {})", correct_word.as_str(), levenshtein(word.as_str(), correct_word.as_str()), closest_dist, damerau_levenshtein(word.as_str(), correct_word.as_str()), sorensen_dice(word.as_str(), correct_word.as_str()), jaro_winkler(word.as_str(), correct_word.as_str()));
|
||||
let dist = levenshtein(word.as_str(), correct_word.as_str());
|
||||
if dist < closest_dist {
|
||||
let mut closest: &str = "";
|
||||
let mut closest_dist: usize = 10000;
|
||||
let mut closest_dist_sorensen: f64 = 0.0f64;
|
||||
for correct_word in list_iter {
|
||||
let dist = levenshtein(word.as_str(), correct_word.as_str());
|
||||
if dist < closest_dist {
|
||||
closest_dist = dist;
|
||||
closest = correct_word;
|
||||
closest_dist_sorensen = sorensen_dice(word.as_str(), correct_word.as_str());
|
||||
} else if dist == closest_dist {
|
||||
if sorensen_dice(word.as_str(), correct_word.as_str()) > closest_dist_sorensen {
|
||||
closest_dist = dist;
|
||||
closest = correct_word;
|
||||
closest_dist_sorensen = sorensen_dice(word.as_str(), correct_word.as_str());
|
||||
} else if dist == closest_dist {
|
||||
if sorensen_dice(word.as_str(), correct_word.as_str()) > closest_dist_sorensen {
|
||||
closest_dist = dist;
|
||||
closest = correct_word;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if closest_dist == 0 {
|
||||
return String::from(word);
|
||||
} else {
|
||||
return String::from(closest);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn correct_file(file_path: String, list_path: String, output_path: String) {
|
||||
struct Distances<'a> {
|
||||
target: &'a str,
|
||||
candidate: &'a str,
|
||||
|
||||
levenshtein: usize,
|
||||
sorensen: OnceCell<f64>,
|
||||
}
|
||||
impl<'a> Distances<'a> {
|
||||
fn new(target: &'a str, candidate: &'a str) -> Self {
|
||||
Self {
|
||||
target,
|
||||
candidate,
|
||||
levenshtein: levenshtein(target, candidate),
|
||||
sorensen: OnceCell::new(),
|
||||
}
|
||||
}
|
||||
fn calc_sorensen(&self) -> &f64 {
|
||||
self.sorensen
|
||||
.get_or_init(|| sorensen_dice(self.target, self.candidate))
|
||||
}
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
assert_eq!(self.target, other.target);
|
||||
match usize::cmp(&self.levenshtein, &other.levenshtein) {
|
||||
Ordering::Less => Ordering::Less,
|
||||
Ordering::Equal => {
|
||||
// intentionally reverse order here
|
||||
f64::total_cmp(other.calc_sorensen(), self.calc_sorensen())
|
||||
}
|
||||
Ordering::Greater => Ordering::Greater,
|
||||
}
|
||||
}
|
||||
}
|
||||
fn find<'a>(target: &'a str, candidates: &'a [String]) -> Option<Distances<'a>> {
|
||||
candidates
|
||||
.par_iter() // or .iter() for single-threaded
|
||||
.map(|candidate| Distances::new(target, candidate))
|
||||
.min_by(Distances::cmp)
|
||||
}
|
||||
|
||||
pub fn correct_concurr(word: String, word_list: &[String]) -> String {
|
||||
let distances = find(&word,word_list).unwrap();
|
||||
if distances.levenshtein == 0 {
|
||||
return String::from(word);
|
||||
} else {
|
||||
return String::from(distances.candidate);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
pub fn correct_file(file_path: String, list_path: &str, output_path: String) {
|
||||
let re: Regex = Regex::new(r"[\w]+[\W]+").unwrap();
|
||||
let re2: Regex = Regex::new(r"(?P<specialfront>[\W&&[^\s]&&[^\n]]*)(?P<text>[\w]+)(?P<specialback>[\W&&[^\s]&&[^\n]]*)").unwrap();
|
||||
let mut input_file_lines: Vec<String> = Vec::new();
|
||||
@@ -53,6 +97,7 @@ pub mod utils {
|
||||
input_file_lines.push(line);
|
||||
});
|
||||
}
|
||||
|
||||
let mut input_words_by_line: Vec<Vec<String>> = Vec::new();
|
||||
for line in input_file_lines {
|
||||
let line_iter = line.split_whitespace();
|
||||
@@ -87,12 +132,12 @@ pub mod utils {
|
||||
let mut out_str: String = String::from("");
|
||||
out_str = out_str
|
||||
+ special_chars_front
|
||||
+ correct(String::from(text), list_path.clone()).as_str()
|
||||
+ correct(String::from(text), list_path).as_str()
|
||||
+ special_chars_back;
|
||||
output.push_str(out_str.as_str());
|
||||
output.push_str(" ");
|
||||
} else {
|
||||
output.push_str(correct(String::from(word), list_path.clone()).as_str());
|
||||
output.push_str(correct(String::from(word), list_path).as_str());
|
||||
output.push_str(" ");
|
||||
}
|
||||
}
|
||||
@@ -103,7 +148,77 @@ pub mod utils {
|
||||
Err(e) => panic!("Cannot write output file: {}", e),
|
||||
};
|
||||
match write!(output_file, "{}", output) {
|
||||
Ok(_) => exit(1),
|
||||
Ok(_) => (),
|
||||
Err(e) => panic!("Cannot write output file: {}", e),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn correct_file_concurr(file_path: String, list_path: &str, output_path: String) {
|
||||
let re: Regex = Regex::new(r"[\w]+[\W]+").unwrap();
|
||||
let re2: Regex = Regex::new(r"(?P<specialfront>[\W&&[^\s]&&[^\n]]*)(?P<text>[\w]+)(?P<specialback>[\W&&[^\s]&&[^\n]]*)").unwrap();
|
||||
let mut input_file_lines: Vec<String> = Vec::new();
|
||||
if let Ok(lines) = read_lines(file_path) {
|
||||
lines.map_while(Result::ok).for_each(|line| {
|
||||
input_file_lines.push(line);
|
||||
});
|
||||
}
|
||||
let mut input_words_by_line: Vec<Vec<String>> = Vec::new();
|
||||
for line in input_file_lines {
|
||||
let line_iter = line.split_whitespace();
|
||||
let mut words: Vec<String> = Vec::new();
|
||||
for word in line_iter {
|
||||
if re.is_match(word) {
|
||||
let mut word_buffer: String;
|
||||
word_buffer =
|
||||
word.replace(&['(', '/', '\"', '\\', '<', '>', '»'], "");
|
||||
word_buffer = word_buffer.replace("ſ", "s");
|
||||
words.push(word_buffer);
|
||||
} else {
|
||||
let push_string = word.replace("ſ", "s");
|
||||
words.push(String::from(push_string));
|
||||
}
|
||||
}
|
||||
input_words_by_line.push(words);
|
||||
}
|
||||
let mut list_correct_words: Vec<String> = Vec::new();
|
||||
if let Ok(lines) = read_lines(list_path) {
|
||||
lines.map_while(Result::ok).for_each(|line| {
|
||||
list_correct_words.push(line);
|
||||
});
|
||||
}
|
||||
let mut output: String = String::new();
|
||||
let input_line_iter = input_words_by_line.iter();
|
||||
for line in input_line_iter {
|
||||
let input_word_iter = line.iter();
|
||||
for word in input_word_iter {
|
||||
let special_chars_front: &str;
|
||||
let special_chars_back: &str;
|
||||
let text: &str;
|
||||
if re.is_match(&word) {
|
||||
let captures = re2.captures(&word).unwrap();
|
||||
special_chars_front = &captures["specialfront"];
|
||||
special_chars_back = &captures["specialback"];
|
||||
text = &captures["text"];
|
||||
let mut out_str: String = String::from("");
|
||||
out_str = out_str
|
||||
+ special_chars_front
|
||||
+ correct_concurr(String::from(text), &list_correct_words).as_str()
|
||||
+ special_chars_back;
|
||||
output.push_str(out_str.as_str());
|
||||
output.push_str(" ");
|
||||
} else {
|
||||
output.push_str(correct_concurr(String::from(word), &list_correct_words).as_str());
|
||||
output.push_str(" ");
|
||||
}
|
||||
}
|
||||
output.push('\n');
|
||||
}
|
||||
let mut output_file: File = match File::create_new(output_path) {
|
||||
Ok(f) => f,
|
||||
Err(e) => panic!("Cannot write output file: {}", e),
|
||||
};
|
||||
match write!(output_file, "{}", output) {
|
||||
Ok(_) => (),
|
||||
Err(e) => panic!("Cannot write output file: {}", e),
|
||||
};
|
||||
}
|
||||
|
55
src/main.rs
55
src/main.rs
@@ -1,8 +1,13 @@
|
||||
use clap::{Args, Parser, Subcommand};
|
||||
use text_correction::utils;
|
||||
use std::time::SystemTime;
|
||||
use std::process::*;
|
||||
use std::path::Path;
|
||||
use spinners::{Spinner, Spinners};
|
||||
use log::{error, trace};
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "german word corrector")]
|
||||
#[command(name = "word corrector")]
|
||||
#[command(version, about, long_about = None)]
|
||||
#[command(next_line_help = true)]
|
||||
struct Cli {
|
||||
@@ -13,7 +18,8 @@ struct Cli {
|
||||
#[derive(Subcommand)]
|
||||
enum Commands {
|
||||
CorrectWord(WordArgs),
|
||||
CorrectFile(FileArgs)
|
||||
CorrectFile(FileArgs),
|
||||
BenchFile(FileArgs)
|
||||
}
|
||||
|
||||
#[derive(Args)]
|
||||
@@ -31,7 +37,9 @@ struct FileArgs {
|
||||
#[arg(short,long)]
|
||||
list_path: String,
|
||||
#[arg(short,long)]
|
||||
output: String
|
||||
output: String,
|
||||
#[arg(short,long)]
|
||||
overwrite: bool
|
||||
}
|
||||
|
||||
fn main() {
|
||||
@@ -39,11 +47,48 @@ fn main() {
|
||||
|
||||
match &cli.command {
|
||||
Commands::CorrectWord(args) => {
|
||||
let out: String = utils::correct(args.input.clone(), args.list_path.clone());
|
||||
let out: String = utils::correct(args.input.clone(), args.list_path.as_str());
|
||||
println!("{}", out);
|
||||
},
|
||||
Commands::CorrectFile(args) => {
|
||||
utils::correct_file(args.input.clone(), args.list_path.clone(), args.output.clone())
|
||||
if args.overwrite == true {
|
||||
trace!("Overwriting old file due to cli argument.");
|
||||
std::fs::remove_file(args.output.clone()).unwrap();
|
||||
} else {
|
||||
trace!("Checking whether destination is writable.");
|
||||
let path = "./".to_owned() + args.output.as_str();
|
||||
if Path::new(path.as_str()).exists() {
|
||||
error!("File already exists!");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
let mut sp = Spinner::new(Spinners::Dots, "Processing file...".into());
|
||||
utils::correct_file_concurr(args.input.clone(), args.list_path.as_str(), args.output.clone());
|
||||
sp.stop_with_message("".into());
|
||||
},
|
||||
Commands::BenchFile(args) => {
|
||||
let start_par = SystemTime::now();
|
||||
utils::correct_file_concurr(args.input.clone(), args.list_path.as_str(), args.output.clone());
|
||||
let stop_par = match start_par.elapsed() {
|
||||
Ok(elapsed) => elapsed.as_millis(),
|
||||
Err(e) => {
|
||||
println!("Error: {e:?}");
|
||||
exit(1);
|
||||
}
|
||||
};
|
||||
println!("Parallel processing took: {stop_par:?} ms");
|
||||
std::fs::remove_file(args.output.clone()).unwrap();
|
||||
let start = SystemTime::now();
|
||||
utils::correct_file(args.input.clone(), args.list_path.as_str(), args.output.clone());
|
||||
let stop = match start.elapsed() {
|
||||
Ok(elapsed) => elapsed.as_millis(),
|
||||
Err(e) => {
|
||||
println!("Error: {e:?}");
|
||||
exit(1);
|
||||
}
|
||||
};
|
||||
println!("Single-thread processing took: {stop:?} ms");
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user