在 Debian 上使用 Rust 做数据分析的实用路线
一、环境与工具链
二、命令行优先的数据处理
三、程序化分析的最小示例(Rust + CSV + Serde)
[package]
name = "csv-analysis"
version = "0.1.0"
edition = "2021"
[dependencies]
csv = "1.1"
serde = { version = "1.0", features = ["derive"] }
use csv::{ReaderBuilder, WriterBuilder};
use serde::Deserialize;
use std::collections::HashMap;
use std::error::Error;
#[derive(Debug, Deserialize)]
struct Record {
country: String,
city: String,
population: Option<u64>,
}
fn main() -> Result<(), Box<dyn Error>> {
let mut rdr = ReaderBuilder::new()
.has_headers(true)
.from_path("data.csv")?;
// 1) 读取并过滤
let mut agg: HashMap<String, Vec<u64>> = HashMap::new();
for result in rdr.deserialize() {
let rec: Record = result?;
if let Some(pop) = rec.population {
if pop > 100_000 {
agg.entry(rec.country).or_default().push(pop);
}
}
}
// 2) 聚合:每个国家的人口列表 -> 总数、均值、中位数
let mut wtr = WriterBuilder::new().has_headers(true).from_writer(std::io::stdout());
wtr.write_record(&["country", "total", "mean", "median"])?;
for (country, pops) in agg {
let total: u64 = pops.iter().sum();
let mean: f64 = total as f64 / pops.len() as f64;
let mut sorted = pops.clone();
sorted.sort_unstable();
let median = sorted[sorted.len() / 2];
wtr.write_record(&[
&country,
&total.to_string(),
&format!("{:.2}", mean),
&median.to_string(),
])?;
}
wtr.flush()?;
Ok(())
}
cargo build --release
./target/release/csv-analysis > out.csv
四、性能对比与瓶颈定位
[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
[[bench]]
name = "my_benchmarks"
harness = false
五、与 R 或 Python 的协作与扩展