Fix multi-language mnemonic restore: Unicode case folding and UTF-8 prefix handling
This commit is contained in:
@@ -54,9 +54,18 @@ fn crc32(data: &str) -> u32 {
|
||||
crc ^ 0xFFFF_FFFF
|
||||
}
|
||||
|
||||
/// Find a word in a word list. Returns the index or None.
|
||||
/// Return the first `n` characters of a UTF-8 string (or the whole string if shorter).
|
||||
fn utf8_prefix(s: &str, n: usize) -> &str {
|
||||
match s.char_indices().nth(n) {
|
||||
Some((byte_pos, _)) => &s[..byte_pos],
|
||||
None => s,
|
||||
}
|
||||
}
|
||||
|
||||
/// Find a word in a word list (case-insensitive, Unicode-aware). Returns the index or None.
|
||||
fn find_word(word_list: &WordList, word: &str) -> Option<usize> {
|
||||
word_list.words.iter().position(|&w| w == word)
|
||||
let lower = word.to_lowercase();
|
||||
word_list.words.iter().position(|&w| w.to_lowercase() == lower)
|
||||
}
|
||||
|
||||
/// Detect language from a mnemonic phrase.
|
||||
@@ -131,14 +140,18 @@ pub fn mnemonic_to_seed(
|
||||
indices.push(idx as u32);
|
||||
}
|
||||
|
||||
// Use canonical wordlist entries for checksum (preserves original case,
|
||||
// e.g. German nouns are capitalized: "Augapfel" not "augapfel").
|
||||
let canonical: Vec<&str> = indices.iter().map(|&i| word_list.words[i as usize]).collect();
|
||||
|
||||
// Verify checksum
|
||||
let prefix_len = word_list.prefix_length;
|
||||
let checksum_data: String =
|
||||
words[..24].iter().map(|w| &w[..w.len().min(prefix_len)]).collect::<Vec<_>>().join("");
|
||||
canonical[..24].iter().map(|w| utf8_prefix(w, prefix_len)).collect::<Vec<_>>().join("");
|
||||
let checksum_index = (crc32(&checksum_data) % 24) as usize;
|
||||
|
||||
let expected_prefix = &words[checksum_index][..words[checksum_index].len().min(prefix_len)];
|
||||
let actual_prefix = &words[24][..words[24].len().min(prefix_len)];
|
||||
let expected_prefix = utf8_prefix(canonical[checksum_index], prefix_len);
|
||||
let actual_prefix = utf8_prefix(canonical[24], prefix_len);
|
||||
|
||||
if expected_prefix != actual_prefix {
|
||||
return Err(MnemonicError::ChecksumMismatch {
|
||||
@@ -203,7 +216,7 @@ pub fn seed_to_mnemonic(seed: &[u8; 32], language: Option<&str>) -> Result<Strin
|
||||
// Calculate checksum word
|
||||
let prefix_len = word_list.prefix_length;
|
||||
let checksum_data: String =
|
||||
words.iter().map(|w| &w[..w.len().min(prefix_len)]).collect::<Vec<_>>().join("");
|
||||
words.iter().map(|w| utf8_prefix(w, prefix_len)).collect::<Vec<_>>().join("");
|
||||
let checksum_index = (crc32(&checksum_data) % 24) as usize;
|
||||
words.push(words[checksum_index]);
|
||||
|
||||
@@ -251,12 +264,96 @@ mod tests {
|
||||
assert_eq!(result.seed, seed, "roundtrip should preserve seed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_german_roundtrip() {
|
||||
let seed = [
|
||||
0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xF0, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66,
|
||||
0x77, 0x88, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55,
|
||||
0x66, 0x77, 0x88, 0x99,
|
||||
];
|
||||
|
||||
// Encode as German (mixed-case wordlist)
|
||||
let mnemonic = seed_to_mnemonic(&seed, Some("german")).unwrap();
|
||||
|
||||
// Decode with explicit language
|
||||
let result = mnemonic_to_seed(&mnemonic, Some("german")).unwrap();
|
||||
assert_eq!(result.seed, seed, "German roundtrip should preserve seed");
|
||||
|
||||
// Decode with auto-detection
|
||||
let result2 = mnemonic_to_seed(&mnemonic, None).unwrap();
|
||||
assert_eq!(result2.seed, seed, "German auto-detect roundtrip should preserve seed");
|
||||
|
||||
// Decode with user-lowercased input (the actual bug scenario)
|
||||
let lowered = mnemonic.to_lowercase();
|
||||
let result3 = mnemonic_to_seed(&lowered, None).unwrap();
|
||||
assert_eq!(result3.seed, seed, "lowercase German input should still decode correctly");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_all_languages_roundtrip() {
|
||||
let seed = [
|
||||
0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xF0, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66,
|
||||
0x77, 0x88, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55,
|
||||
0x66, 0x77, 0x88, 0x99,
|
||||
];
|
||||
|
||||
for lang in available_languages() {
|
||||
let mnemonic =
|
||||
seed_to_mnemonic(&seed, Some(lang)).unwrap_or_else(|e| panic!("{lang}: {e}"));
|
||||
|
||||
// Original-case roundtrip
|
||||
let result = mnemonic_to_seed(&mnemonic, Some(lang))
|
||||
.unwrap_or_else(|e| panic!("{lang} decode: {e}"));
|
||||
assert_eq!(result.seed, seed, "{lang}: roundtrip failed");
|
||||
|
||||
// Lowercased input (user may type all lowercase)
|
||||
let lowered = mnemonic.to_lowercase();
|
||||
let result2 = mnemonic_to_seed(&lowered, Some(lang))
|
||||
.unwrap_or_else(|e| panic!("{lang} lowercase decode: {e}"));
|
||||
assert_eq!(result2.seed, seed, "{lang}: lowercase roundtrip failed");
|
||||
|
||||
// Auto-detect from lowercased input
|
||||
let result3 = mnemonic_to_seed(&lowered, None)
|
||||
.unwrap_or_else(|e| panic!("{lang} auto-detect: {e}"));
|
||||
assert_eq!(result3.seed, seed, "{lang}: auto-detect roundtrip failed");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_wrong_word_count() {
|
||||
let result = mnemonic_to_seed("one two three", Some("english"));
|
||||
assert!(matches!(result, Err(MnemonicError::WrongWordCount(3))));
|
||||
}
|
||||
|
||||
/// Cross-implementation test using the exact German seed from the C++ test suite
|
||||
/// (`tests/unit_tests/mnemonics.cpp` — `case_tolerance` test).
|
||||
/// Verifies that mixed-case, all-lowercase, and auto-detected decoding all
|
||||
/// produce the same seed bytes.
|
||||
#[test]
|
||||
fn test_cpp_german_case_tolerance() {
|
||||
// Exact seed from C++ test: mixed case with umlaut (Grünalge)
|
||||
let seed_mixed = "Neubau umarmen Abart umarmen Turban feilen Brett Bargeld \
|
||||
Episode Milchkuh Substanz Jahr Armband Maibaum Tand Grünalge Tabak \
|
||||
erziehen Federboa Lobrede Tenor Leuchter Curry Diskurs Tenor";
|
||||
|
||||
// Decode mixed-case (as originally generated)
|
||||
let result1 = mnemonic_to_seed(seed_mixed, None).expect("mixed-case German decode");
|
||||
assert_eq!(result1.language.english_name, "german");
|
||||
|
||||
// All-lowercase (what C++ boost::algorithm::to_lower produces)
|
||||
let seed_lower = seed_mixed.to_lowercase();
|
||||
let result2 = mnemonic_to_seed(&seed_lower, None).expect("lowercase German decode");
|
||||
assert_eq!(result2.language.english_name, "german");
|
||||
|
||||
// Both must produce identical seed bytes
|
||||
assert_eq!(result1.seed, result2.seed, "case should not affect seed derivation");
|
||||
|
||||
// Re-encode from the derived seed and verify roundtrip
|
||||
let re_encoded = seed_to_mnemonic(&result1.seed, Some("german")).unwrap();
|
||||
let result3 = mnemonic_to_seed(&re_encoded, Some("german")).unwrap();
|
||||
assert_eq!(result3.seed, result1.seed, "re-encoded German roundtrip failed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_available_languages() {
|
||||
let langs = available_languages();
|
||||
|
||||
Reference in New Issue
Block a user