moodmosaic

Enterprise Case 1: Money Parser, Unicode Minus

This post is part of the Input Coverage > Code Coverage series.

Parsing money is simple until it is not. Users paste from Excel and email. Unicode minus (U+2212) shows up. ASCII - is not the only minus.

Bug: parser treats only ASCII - as negative. Invariant breaks: a string containing a minus yields a positive value.

money-amounts/src/lib.rs (buggy on purpose):

use thiserror::Error;

#[derive(Debug, Error, PartialEq, Eq)]
pub enum MoneyError {
    #[error("invalid format")]
    Invalid,
    #[error("too many decimals")]
    TooManyDecimals,
}

pub fn format_cents(cents: i64) -> String {
    let sign = if cents < 0 { "-" } else { "" };
    let abs = cents.abs();
    let whole = abs / 100;
    let frac = abs % 100;
    format!("{sign}{whole}.{frac:02}")
}

/// Known bug: does not handle U+2212 minus.
pub fn parse_money(s: &str) -> Result<i64, MoneyError> {
    let mut t = s.trim();
    let mut neg = false;

    if t.starts_with('(') && t.ends_with(')') {
        neg = true;
        t = &t[1..t.len() - 1];
    }

    let mut cleaned = String::with_capacity(t.len());
    for ch in t.chars() {
        if matches!(ch, '$' | '€' | '£' | ' ') { continue; }
        cleaned.push(ch);
    }

    if let Some(rest) = cleaned.strip_prefix('-') {
        neg = true;
        cleaned = rest.to_string();
    }

    let dot = cleaned.rfind('.');
    let comma = cleaned.rfind(',');

    let (int_part, frac_part) = match (dot, comma) {
        (Some(d), Some(c)) if d > c => {
            (cleaned[..d].replace(',', ""), &cleaned[d + 1..])
        }
        (Some(d), None) => (cleaned[..d].to_string(), &cleaned[d + 1..]),
        (None, Some(c)) => (cleaned[..c].replace('.', ""), &cleaned[c + 1..]),
        _ => (cleaned.clone(), ""),
    };

    if frac_part.len() > 2 { return Err(MoneyError::TooManyDecimals); }

    let mut digits = String::with_capacity(int_part.len());
    for ch in int_part.chars() {
        if ch.is_ascii_digit() { digits.push(ch); }
        else { return Err(MoneyError::Invalid); }
    }

    let int_val: i64 = if digits.is_empty() { 0 } else {
        digits.parse().map_err(|_| MoneyError::Invalid)?
    };

    let frac_val: i64 = match frac_part {
        "" => 0,
        x if x.chars().all(|c| c.is_ascii_digit()) => {
            if x.len() == 1 { (x.as_bytes()[0] - b'0') as i64 * 10 }
            else { x.parse::<i64>().map_err(|_| MoneyError::Invalid)? }
        }
        _ => return Err(MoneyError::Invalid),
    };

    let cents = int_val.checked_mul(100)
        .and_then(|v| v.checked_add(frac_val))
        .ok_or(MoneyError::Invalid)?;

    Ok(if neg { -cents } else { cents })
}

pub fn contains_any_minus(s: &str) -> bool {
    s.contains('-') || s.contains('\u{2212}')
}

Next: Fuzzing The Money Parser With libFuzzer