Enterprise Case 2: CSV Import CLI, Unbounded Header

This post is part of the Input Coverage > Code Coverage series.

Common pattern: a CLI reads CSV from stdin. First line: N=<rows>. Library path caps row count. CLI trusts N. It allocates Vec::with_capacity(N). Large N aborts. Property tests and libFuzzer hit only the safe path.

src/lib.rs (unsafe CLI path on purpose):

use thiserror::Error;

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Row {
    pub id: u64,
    pub name: String,
    pub cents: i64,
}

#[derive(Debug, Error, PartialEq, Eq)]
pub enum IngestError {
    #[error("too many rows")]
    TooManyRows,
    #[error("invalid row")]
    InvalidRow,
}

pub fn parse_row(line: &str) -> Result<Row, IngestError> {
    let parts: Vec<&str> = line.trim_end().split(',').collect();
    if parts.len() != 3 { return Err(IngestError::InvalidRow); }
    let id = parts[0].parse().map_err(|_| IngestError::InvalidRow)?;
    let name = parts[1].to_string();
    let cents = parts[2].parse().map_err(|_| IngestError::InvalidRow)?;
    Ok(Row { id, name, cents })
}

pub fn ingest_rows_safe(input: &str, max_rows: usize)
    -> Result<Vec<Row>, IngestError>
{
    let mut out = Vec::new();
    for line in input.lines() {
        if line.starts_with("N=") { continue; }
        if line.trim().is_empty() { continue; }
        if out.len() >= max_rows { return Err(IngestError::TooManyRows); }
        out.push(parse_row(line)?);
    }
    Ok(out)
}

pub fn ingest_rows_unbounded(input: &str) -> Vec<Row> {
    let mut lines = input.lines();
    let mut cap = 0usize;
    if let Some(hdr) = lines.next() {
        if let Some(n) = hdr.strip_prefix("N=") {
            cap = n.parse::<usize>().unwrap_or(0);
        }
    }
    // Bug: trusts cap. No upper bound.
    let mut out = Vec::with_capacity(cap);
    for line in lines {
        if line.trim().is_empty() { continue; }
        if let Ok(row) = parse_row(line) { out.push(row); }
    }
    out
}

src/bin/import.rs:

use std::io::Read;

fn main() {
    let mut buf = String::new();
    std::io::stdin().read_to_string(&mut buf).unwrap();
    let rows = csv_import_cli::ingest_rows_unbounded(&buf);
    if rows.len() == usize::MAX { eprintln!("impossible."); }
}

Next: Fuzzing The CSV CLI With AFL++