Skip to content

Commit

Permalink
Fix the error message when an invalid file or path is specified (#439)
Browse files Browse the repository at this point in the history
* Fix the error message when an invalid file or path is specified

* Remove debug print

* Fix expected message for Windows
  • Loading branch information
mosuka authored Dec 8, 2024
1 parent f114f3f commit 0a92b32
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 9 deletions.
75 changes: 66 additions & 9 deletions lindera/src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,20 @@ fn yaml_to_config(file_path: &Path) -> LinderaResult<TokenizerConfig> {
.read_to_end(&mut buffer)
.map_err(|err| LinderaErrorKind::Io.with_error(anyhow::anyhow!(err)))?;

let tokenizer_config: TokenizerConfig = serde_yaml::from_slice::<TokenizerConfig>(&buffer)
.map_err(|err| LinderaErrorKind::Io.with_error(anyhow::anyhow!(err)))?;

Ok(tokenizer_config)
match serde_yaml::from_slice::<serde_yaml::Value>(&buffer) {
Ok(value) => {
// Check if the value is a mapping.
match value {
serde_yaml::Value::Mapping(_) => {
Ok(serde_json::to_value(value).map_err(|err| {
LinderaErrorKind::Deserialize.with_error(anyhow::anyhow!(err))
})?)
}
_ => Err(LinderaErrorKind::Deserialize.with_error(anyhow::anyhow!("Invalid YAML"))),
}
}
Err(err) => Err(LinderaErrorKind::Deserialize.with_error(err)),
}
}

/// Returns the default configuration as a `serde_json::Value`.
Expand All @@ -46,26 +56,27 @@ fn ensure_keys(mut config: Value) -> Value {
if config.get("segmenter").is_none() {
config["segmenter"] = json!({});
}

if config.get("character_filters").is_none() {
config["character_filters"] = json!([]);
}

if config.get("token_filters").is_none() {
config["token_filters"] = json!([]);
}

config
}

#[derive(Debug)]
pub struct TokenizerBuilder {
config: TokenizerConfig,
}

impl TokenizerBuilder {
pub fn new() -> LinderaResult<Self> {
if let Ok(config_path) = env::var("LINDERA_CONFIG_PATH") {
Self::from_file(Path::new(&config_path)).map_err(|e| {
LinderaErrorKind::Parse
.with_error(anyhow::anyhow!("failed to load config file: {}", e))
})
Self::from_file(Path::new(&config_path))
} else {
Ok(Self {
config: empty_config(),
Expand All @@ -74,7 +85,9 @@ impl TokenizerBuilder {
}

pub fn from_file(file_path: &Path) -> LinderaResult<Self> {
let config = yaml_to_config(file_path).unwrap_or_else(|_| empty_config());
let config = yaml_to_config(file_path)?;

println!("config: {:?}", config);

Ok(TokenizerBuilder {
config: ensure_keys(config),
Expand Down Expand Up @@ -696,4 +709,48 @@ mod tests {
}
}
}

#[test]
#[cfg(not(windows))]
#[should_panic(expected = "No such file or directory")]
fn test_create_tokenizer_builder_from_non_existent_file() {
use std::path::PathBuf;

use crate::tokenizer::TokenizerBuilder;

let config_file = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("../resources")
.join("non_existent_file.yml");

TokenizerBuilder::from_file(&config_file).unwrap();
}

#[test]
#[cfg(windows)]
#[should_panic(expected = "The system cannot find the file specified.")]
fn test_create_tokenizer_builder_from_non_existent_file() {
use std::path::PathBuf;

use crate::tokenizer::TokenizerBuilder;

let config_file = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("../resources")
.join("non_existent_file.yml");

TokenizerBuilder::from_file(&config_file).unwrap();
}

#[test]
#[should_panic(expected = "Invalid YAML")]
fn test_create_tokenizer_builder_from_invalid_file() {
use std::path::PathBuf;

use crate::tokenizer::TokenizerBuilder;

let config_file = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("../resources")
.join("invalid.yml");

TokenizerBuilder::from_file(&config_file).unwrap();
}
}
1 change: 1 addition & 0 deletions resources/invalid.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Invalid configuration

0 comments on commit 0a92b32

Please sign in to comment.