在 Debian 上使用 Rust 进行数据分析
一 环境准备
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | shsource $HOME/.cargo/envrustc --version、cargo --versionexport RUSTUP_DIST_SERVER=https://mirrors.ustc.edu.cn/rust-staticexport RUSTUP_UPDATE_ROOT=https://mirrors.ustc.edu.cn/rust-static/rustup$HOME/.cargo/config):
[source.crates-io] registry = "https://github.com/rust-lang/crates.io-index" [source.ustc] registry = "git://mirrors.ustc.edu.cn/crates.io-index"[source.crates-io] replace-with = 'rsproxy' [source.rsproxy] registry = "https://rsproxy.cn/crates.io-index" [registries.rsproxy] index = "https://rsproxy.cn/crates.io-index" [net] git-fetch-with-cli = truesudo apt update && sudo apt install -y build-essential gdb。二 常用数据分析库与场景
三 快速上手示例 读取 CSV 并计算均值
cargo new rust-data-demo && cd rust-data-demo[package]
name = "rust-data-demo"
version = "0.1.0"
edition = "2021"
[dependencies]
polars = "0.37"
name,age,salary
Alice,25,52000
Bob,30,64000
Charlie,35,80000
use polars::prelude::*;
use std::error::Error;
fn main() -> Result<(), Box<dyn Error>> {
// 读取 CSV
let df = CsvReader::from_path("data.csv")?
.has_header(true)
.finish()?;
// 计算平均年龄与平均薪资
let mean_age: f64 = df.column("age")?.f64()?.mean()?;
let mean_salary: f64 = df.column("salary")?.f64()?.mean()?;
println!("Mean age: {:.2}", mean_age);
println!("Mean salary: {:.2}", mean_salary);
Ok(())
}
cargo run --release(–release 提升性能)。四 进阶 性能分析与结果可视化
sudo apt-get install -y gnuplot[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
[[bench]]
name = "benchmarks"
harness = false
use criterion::{criterion_group, criterion_main, Criterion, black_box};
fn sum_slow(x: &[f64]) -> f64 {
let mut s = 0.0;
for &v in x { s += v; }
s
}
fn sum_fast(x: &[f64]) -> f64 {
x.iter().sum()
}
fn bench_sums(c: &mut Criterion) {
let data: Vec<f64> = (0..10_000).map(|i| i as f64).collect();
c.bench_function("sum slow", |b| b.iter(|| sum_slow(black_box(&data))));
c.bench_function("sum fast", |b| b.iter(|| sum_fast(black_box(&data))));
}
criterion_group!(benches, bench_sums);
criterion_main!(benches);
cargo bench(生成 HTML 报告,便于对比不同实现)plotters = "0.3"use plotters::prelude::*;
fn plot_histogram(data: &[f64], path: &str) -> Result<(), Box<dyn std::error::Error>> {
let root = BitMapBackend::new(path, (640, 480)).into_drawing_area();
root.fill(&WHITE)?;
let max = data.iter().cloned().fold(f64::MIN, f64::max);
let min = data.iter().cloned().fold(f64::MAX, f64::min);
let mut chart = ChartBuilder::on(&root)
.caption("Age Histogram", ("sans-serif", 20))
.margin(10)
.x_label_area_size(40)
.y_label_area_size(40)
.build_cartesian_2d((min..max).step(5.0), 0..(data.len() as u32 / 3 + 1))?;
chart.configure_mesh().draw()?;
chart.draw_series(
Histogram::vertical(&chart)
.style(BLUE.filled())
.data(data.iter().map(|&x| (x, 1))),
)?;
Ok(())
}
// 在 main 末尾调用:
// plot_histogram(&[25.0, 30.0, 35.0], "age_hist.png")?;
cargo run --release 生成图像文件。五 工程化与部署
cargo install cargo-debcargo deb(产物位于 target/debian/*.deb)sudo dpkg -i target/debian/*.deb,若依赖缺失执行 sudo apt-get install -fcargo deb --separate-debug-symbolsrustup target add x86_64-unknown-linux-gnu(或如 armv7-unknown-linux-gnueabihf 等)cargo build --release --target x86_64-unknown-linux-gnu。