Store

The Store provides the query interface for morphological data.

Opening a Store

Usually accessed through Repository:

#![allow(unused)]
fn main() {
use unimorph_core::Repository;

let repo = Repository::open_default()?;
let store = repo.store();
}

Or open directly:

#![allow(unused)]
fn main() {
use unimorph_core::Store;

// Open existing database
let store = Store::open("path/to/datasets.db")?;

// In-memory store (for testing)
let store = Store::in_memory()?;
}

Basic Queries

Inflect (Lemma to Forms)

Look up all inflected forms of a lemma:

#![allow(unused)]
fn main() {
let forms = store.inflect("heb", "כתב")?;

for entry in &forms {
    println!("{} -> {} ({})", entry.lemma, entry.form, entry.features);
}

println!("Found {} forms", forms.len());
}

Analyze (Form to Lemmas)

Find all possible lemmas for a surface form:

#![allow(unused)]
fn main() {
let analyses = store.analyze("heb", "כתבו")?;

for entry in &analyses {
    println!("{} <- {} ({})", entry.form, entry.lemma, entry.features);
}

// Handle ambiguous forms
if analyses.len() > 1 {
    println!("Ambiguous: {} possible analyses", analyses.len());
}
}

Statistics

Get dataset statistics:

#![allow(unused)]
fn main() {
if let Some(stats) = store.stats("heb")? {
    println!("Entries: {}", stats.total_entries);
    println!("Lemmas: {}", stats.unique_lemmas);
    println!("Forms: {}", stats.unique_forms);
}
}

Check Language

Check if a language is loaded:

#![allow(unused)]
fn main() {
if store.has_language("heb")? {
    println!("Hebrew is available");
}

// List all languages
let languages = store.languages()?;
for lang in languages {
    println!("- {}", lang);
}
}

Query Builder

For flexible searching, use the query builder:

#![allow(unused)]
fn main() {
let results = store.query("heb")
    .lemma("כת%")           // LIKE pattern (% = any chars)
    .form("%ים")            // Forms ending in ים
    .pos("V")               // Part of speech
    .features_match("V;*;SG;*")  // Pattern match
    .features_contain(&["FUT"])  // Contains feature
    .limit(100)
    .offset(0)
    .execute()?;
}

See Query Builder for full documentation.

Data Management

Import Data

Import entries from TSV format:

#![allow(unused)]
fn main() {
use unimorph_core::{Entry, LangCode};

let lang: LangCode = "test".parse()?;
let entries = vec![
    Entry::parse_line("test\tform1\tN;SG", 1)?,
    Entry::parse_line("test\tform2\tN;PL", 2)?,
];

store.import(&lang, &entries, None, None)?;
}

Delete Language

Remove a language from the store:

#![allow(unused)]
fn main() {
let removed = store.delete_language("heb")?;
println!("Removed {} entries", removed);
}

Export

Export to various formats:

#![allow(unused)]
fn main() {
// Export to TSV file
let count = store.export_tsv("heb", "hebrew.tsv")?;

// Export to JSONL file
let count = store.export_jsonl("heb", "hebrew.jsonl")?;

// Export to writer (e.g., stdout)
use std::io::stdout;
let count = store.export_tsv_to_writer("heb", stdout().lock())?;

// Parquet (with feature flag)
#[cfg(feature = "parquet")]
let count = store.export_parquet("heb", "hebrew.parquet")?;
}

Thread Safety

Store is Send but not Sync. For concurrent access, use a mutex or create separate store instances:

#![allow(unused)]
fn main() {
use std::sync::Mutex;

let store = Mutex::new(Store::open("datasets.db")?);

// In threads:
let store = store.lock().unwrap();
let results = store.inflect("heb", "כתב")?;
}

Error Handling

#![allow(unused)]
fn main() {
use unimorph_core::{Store, Error};

match store.inflect("xyz", "test") {
    Ok(entries) => println!("Found {} entries", entries.len()),
    Err(Error::LanguageNotFound(lang)) => {
        println!("Language {} not downloaded", lang);
    }
    Err(e) => return Err(e.into()),
}
}