Types

Core data types in unimorph-core.

LangCode

A validated ISO 639-3 language code (3 lowercase ASCII letters).

#![allow(unused)]
fn main() {
use unimorph_core::LangCode;

// Parse from string
let lang: LangCode = "heb".parse()?;

// Validation happens at parse time
assert!("HEB".parse::<LangCode>().is_err());  // Must be lowercase
assert!("he".parse::<LangCode>().is_err());   // Must be 3 chars
assert!("h3b".parse::<LangCode>().is_err());  // Must be letters

// Convert to string
let s: &str = lang.as_ref();
let s: String = lang.to_string();
}

Entry

A single morphological entry with lemma, form, and features.

#![allow(unused)]
fn main() {
use unimorph_core::Entry;

// Entries are returned from queries
let entries = store.inflect("heb", "כתב")?;
for entry in entries {
    println!("Lemma: {}", entry.lemma);
    println!("Form: {}", entry.form);
    println!("Features: {}", entry.features);
    println!("Features (raw): {}", entry.features.raw());
    println!("Features (list): {:?}", entry.features.as_slice());
}

// Parse from TSV line
let entry = Entry::parse_line("כתב\tכתבתי\tV;1;SG;PST", 1)?;

// Serialize to JSON
let json = serde_json::to_string(&entry)?;
}

Fields

FieldTypeDescription
lemmaStringDictionary form
formStringInflected surface form
featuresFeatureBundleMorphological features

FeatureBundle

A semicolon-separated bundle of morphological features.

#![allow(unused)]
fn main() {
use unimorph_core::FeatureBundle;

// Parse from string
let features: FeatureBundle = "V;1;SG;PST".parse()?;

// Access individual features
assert_eq!(features.as_slice(), &["V", "1", "SG", "PST"]);
assert_eq!(features.raw(), "V;1;SG;PST");
assert_eq!(features.len(), 4);

// Check if contains a feature (position-independent)
assert!(features.contains("PST"));
assert!(features.contains("V"));
assert!(!features.contains("FUT"));

// Check if contains all features
assert!(features.contains_all(&["V", "PST"]));

// Pattern matching with wildcards
assert!(features.matches("V;*;SG;*"));
assert!(features.matches("V;1;*;PST"));
assert!(!features.matches("N;*;*;*"));

// Display
println!("{}", features);  // "V;1;SG;PST"
}

Pattern Matching

The matches method supports positional pattern matching:

PatternDescription
V;1;SG;PSTExact match
V;*;SG;*Wildcard at positions 1 and 3
*;*;*;PSTOnly check position 3

Note: Pattern must have same number of positions as the bundle.

Validation

  • Feature bundles cannot be empty
  • Individual features cannot be empty
  • Features are separated by semicolons
#![allow(unused)]
fn main() {
assert!("".parse::<FeatureBundle>().is_err());      // Empty
assert!("V;;SG".parse::<FeatureBundle>().is_err()); // Empty feature
}

DatasetStats

Statistics about a downloaded language dataset.

#![allow(unused)]
fn main() {
use unimorph_core::DatasetStats;

let stats = store.stats("heb")?;
if let Some(stats) = stats {
    println!("Total entries: {}", stats.total_entries);
    println!("Unique lemmas: {}", stats.unique_lemmas);
    println!("Unique forms: {}", stats.unique_forms);
    println!("Unique features: {}", stats.unique_features);
}
}

Fields

FieldTypeDescription
total_entriesusizeNumber of entries
unique_lemmasusizeDistinct lemmas
unique_formsusizeDistinct surface forms
unique_featuresusizeDistinct feature bundles

Serialization

All types implement Serialize and Deserialize from serde:

#![allow(unused)]
fn main() {
use unimorph_core::Entry;

let entry = store.inflect("heb", "כתב")?.first().unwrap();

// To JSON
let json = serde_json::to_string(&entry)?;

// From JSON
let entry: Entry = serde_json::from_str(&json)?;
}