当前位置：首页 > news >正文

一个网站要多大的空间网页制作基础教程视频教程葛艳玲

news 2025/12/31 22:35:17

一个网站要多大的空间,网页制作基础教程视频教程葛艳玲,河北省建设机械协会网站,深圳品牌设计公司深圳vi设计Polars虽牛刀小试#xff0c;就显博大精深#xff0c;在数据分析上#xff0c;未来有重要一席。下面主要列举一些常见用法。一、toml 需要说明的是#xff0c;在Rust中#xff0c;不少的功能都需要对应features引入设置#xff0c;这些需要特别注意#xff0c;否则编译…Polars虽牛刀小试就显博大精深在数据分析上未来有重要一席。下面主要列举一些常见用法。一、toml 需要说明的是在Rust中不少的功能都需要对应features引入设置这些需要特别注意否则编译通不过。以下polars的版本是0.41.3。相关依赖项如下 [dependencies] polars { version 0.41.3, features [lazy,dtype-struct,polars-io,dtype-datetime,dtype-date,range,temporal,rank,serde,csv,ndarray,parquet,strings] } rand 0.8.5 chrono 0.4.38 serde_json 1.0.124 itertools 0.13二、main.rs 部分函数功能还没有完成用todo标示请大家注意。 #![allow(warnings,dead_code, unused,unused_imports, unused_variables, unused_mut)] use polars::prelude::*; use std::time::Instant; use serde_json::*; use chrono::{NaiveDate}; #[allow(dead_code)] fn main(){//create_df_by_series();//create_df_by_df_macro();//df_apply();// 需要把相关函数放在里面即可这里不一一列示。//df_to_vec_tuples_by_izip();//write_read_parquet_files();//date_to_str_in_column();str_to_datetime_date_cast_in_df();//create_list_in_df_by_apply();//unnest_struct_in_df();//as_struct_in_df();//struct_apply_in_df();//test(); }fn create_df_by_series(){println!(------------- create_df_by_series test ---------------- );let s1 Series::new(from vec, vec![4, 3, 2]);let s2 Series::new(from slice, [true, false, true]);let s3 Series::new(from array, [rust, go, julia]);let df DataFrame::new(vec![s1, s2, s3]).unwrap();println!({:?}, df); }fn create_df_by_df_macro(){println!(------------- create_df_by_macro test ---------------- );let df1: DataFrame df!(D1 [1, 3, 1, 5, 6],D2 [3, 2, 3, 5, 3]).unwrap();let df2 df1.lazy().select([col(D1).count().alias(total),col(D1).filter(col(D1).gt(lit(2))).count().alias(D1 3),]).collect().unwrap();println!({}, df2); }fn rank(){println!(------------- rank test ---------------- );// 注意toml feature : ranklet mut df df!(scores [A, A, A, B, C, B],class [1, 2, 3, 4, 2, 2]).unwrap();let df df.clone().lazy().with_column(col(class).rank(RankOptions{method: RankMethod::Ordinal, descending: false}, None).over([col(scores)]).alias(rank_)).sort_by_exprs([col(scores), col(class), col(rank_)], Default::default());println!({:?}, df.collect().unwrap().head(Some(3))); }fn head_tail_sort(){println!(------------------head_tail_sort test-------------------);let df df!(scores [A, B, C, B, A, B],class [1, 3, 1, 1, 2, 3]).unwrap();let head df.head(Some(3));let tail df.tail(Some(3));// 对value列进行sort,生成新的series并进行排序let sort df.lazy().select([col(class).sort(Default::default())]).collect();println!(df head :{:?},head);println!(df tail:{:?},tail);println!(df sort:{:?},sort); }fn filter_group_by_agg(){println!(----------filter_group_by_agg test--------------);use rand::{thread_rng, Rng};let mut arr [0f64; 5];thread_rng().fill(mut arr);let df df! (nrs [Some(1), Some(2), Some(3), None, Some(5)],names [Some(foo), Some(ham), Some(spam), Some(eggs), None],random arr,groups [A, A, B, C, B],).unwrap();let df2 df.clone().lazy().filter(col(groups).eq(lit(A))).collect().unwrap();println!(df2 :{:?},df2);println!({}, df);let out df.lazy().group_by([col(groups)]).agg([sum(nrs), // sum nrs by groupscol(random).count().alias(count), // count group members// sum random where name ! nullcol(random).filter(col(names).is_not_null()).sum().name().suffix(_sum),col(names).reverse().alias(reversed names),]).collect().unwrap();println!({}, out);}fn filter_by_exclude(){println!(----------filter_by_exclude----------------------);let df df!(code [600036.SH.to_string(),600036.SH.to_string(),600036.SH.to_string()],date [NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],close [1.21,1.22,1.23],open [1.22,1.21,1.23],high [1.22,1.25,1.24],low [1.19, 1.20,1.21],).unwrap();let lst df[date].as_list().slice(1,1);println!(s :{:?},lst);// 下面all() 可以用col(*)替代let df_filter df.lazy().select([all().exclude([code,date])]).collect().unwrap();println!(df_filter :{},df_filter);}fn windows_over(){println!(------------- windows_over test ---------------- );let df df!(key [a, a, a, a, b, c],value [1, 2, 1, 3, 3, 3]).unwrap();// over()函数col(value).min().over([col(key)]),表示请根据col(key)进行分类再对分类得到的组求最小值操作let df df.clone().lazy().with_column(col(value).min() // .max(), .mean().over([col(key)]).alias(over_min)).with_column(col(value).max().over([col(key)]).alias(over_max));println!({:?}, df.collect().unwrap().head(Some(10))); }//read_csvfn lazy_read_csv(){println!(------------- lazy_read_csv test ---------------- );// features lazy and csv // 请根据自己文件情况进行设置let filepath ../my_duckdb/src/test.csv;// CSV数据格式// 600036.XSHG,2079/7/24,3345.9,3357.8,3326.7,3357,33589,69181710.57,1// 600036.XSHG,2079/7/25,3346,3357.9,3326.8,3357.1,33590,69184251.47,1let polars_lazy_csv_time Instant::now();let p LazyCsvReader::new(filepath).with_try_parse_dates(true) //需要增加Available on crate feature temporal only..with_has_header(true).finish().unwrap();let df p.collect().expect(error to dataframe!);println!(polars lazy 读出csv的行和列数{:?},df.shape());println!(polars lazy 读csv 花时 {:?} 秒, polars_lazy_csv_time.elapsed().as_secs_f32()); }fn read_csv(){println!(------------- read_csv test ---------------- );// features polars-iouse std::fs::File;let csv_time Instant::now();let filepath ../my_duckdb/src/test.csv;// CSV数据格式// 600036.XSHG,2079/7/24,3345.9,3357.8,3326.7,3357,33589,69181710.57,1// 600036.XSHG,2079/7/25,3346,3357.9,3326.8,3357.1,33590,69184251.47,1let file File::open(filepath).expect(could not read file);let df CsvReader::new(file).finish().unwrap();//println!(df:{:?},df);println!(读出csv的行和列数{:?},df.shape());println!(读csv 花时 {:?} 秒,csv_time.elapsed().as_secs_f32()); }fn read_csv2(){println!(------------- read_csv2 test ---------------- );// features polars-io// 具体按自己目录路径下的文件let filepath ../my_duckdb/src/test.csv; //请根据自已文件情况进行设置// CSV数据格式// 600036.XSHG,2079/7/24,3345.9,3357.8,3326.7,3357,33589,69181710.57,1// 600036.XSHG,2079/7/25,3346,3357.9,3326.8,3357.1,33590,69184251.47,1let df CsvReadOptions::default().with_has_header(true).try_into_reader_with_file_path(Some(filepath.into())).unwrap().finish().unwrap();println!(read_csv2 df {:?},df) }fn parse_date_csv(){println!(------------- parse_date_csv test ---------------- );// features polars-iolet filepath ../my_duckdb/src/test.csv;// 读出csv并对csv中date类型进行转换// CSV数据格式// 600036.XSHG,2019/7/24,3345.9,3357.8,3326.7,3357,33589,69181710.57,1// 600036.XSHG,2019/7/25,3346,3357.9,3326.8,3357.1,33590,69184251.47,1let df CsvReadOptions::default().map_parse_options(|parse_options| parse_options.with_try_parse_dates(true)).try_into_reader_with_file_path(Some(filepath.into())).unwrap().finish().unwrap();println!({}, df); }fn write_csv_df(){println!(----------- write_csv_df test -------------------------);// toml features csv// features polars-iolet mut df df!(code [600036.SH.to_string(),600036.SH.to_string(),600036.SH.to_string()],date [NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],close [1.21,1.22,1.23],open [1.22,1.21,1.23],high [1.22,1.25,1.24],low [1.19, 1.20,1.21],).unwrap();let mut file std::fs::File::create(600036SH.csv).unwrap();CsvWriter::new(mut file).finish(mut df).unwrap(); }fn iter_dataframe_as_row() {println!(------------- iter_dataframe_as_row test ---------------- );let starttime Instant::now();let df: DataFrame df!(D1 [1, 3, 1, 5, 6],D2 [3, 2, 3, 5, 3]).unwrap();let (_row,_col) df.shape();for i in 0.._row{let mut rows Vec::new();for j in 0.._col{let value df[j].get(i).unwrap();rows.push(value);}}println!(dataframe按行遍历cost time :{:?} seconds!,starttime.elapsed().as_secs_f32()); }fn join_concat(){println!(------------- join_concat test ---------------- );// 创建表结构内部有空数据let df df! [// 表头对应数据Model [iPhone XS, iPhone 12, iPhone 13, iPhone 14, Samsung S11, Samsung S12, Mi A1, Mi A2],Company [Apple, Apple, Apple, Apple, Samsung, Samsung, Xiao Mi, Xiao Mi],Sales [80, 170, 130, 205, 400, 30, 14, 8],Comment [None, None, Some(Sold Out), Some(New Arrival), None, Some(Sold Out), None, None],].unwrap();let df_price df! [Model [iPhone XS, iPhone 12, iPhone 13, iPhone 14, Samsung S11, Samsung S12, Mi A1, Mi A2],Price [2430, 3550, 5700, 8750, 2315, 3560, 980, 1420],Discount [Some(0.85), Some(0.85), Some(0.8), None, Some(0.87), None, Some(0.66), Some(0.8)],].unwrap();// 合并// join()接收5个参数分别是要合并的DataFrame左表主键右表主键合并方式let df_join df.join(df_price, [Model], [Model], JoinArgs::from(JoinType::Inner)).unwrap();println!({:?}, df_join);let df_v1 df!(a [1],b [3],).unwrap();let df_v2 df!(a [2],b [4],).unwrap();let df_vertical_concat concat([df_v1.clone().lazy(), df_v2.clone().lazy()],UnionArgs::default(),).unwrap().collect().unwrap();println!({}, df_vertical_concat);}fn get_slice_scalar_from_df(){println!(------------- get_slice_scalar_from_df test ---------------- );let df: DataFrame df!(D1 [1, 2, 3, 4, 5],D2 [3, 2, 3, 5, 3]).unwrap();// slice(1,4): 从第2行开始(包含)各列向下共取4行let slice df.slice(1,4);println!(slice :{:?},slice);// 获取第2列第3个值的标量let scalar df[1].get(3).unwrap(); println!(saclar :{:?},scalar); }fn replace_drop_col(){println!(------------- replace_drop_col test ---------------- );// toml :features replacelet mut df: DataFrame df!(D1 [1, 2, 3, 4, 5],D2 [3, 2, 3, 5, 3]).unwrap();let new_s1 Series::new(, [2,3,4,5,6]); // 为名字不变// D1列进行替换let df2 df.replace(D1, new_s1).unwrap();// 删除D2列let df3 df2.drop_many([D2]);println!(df3:{:?},df3); }fn drop_null_fill_null(){println!(------------- drop_null_fill_null test ---------------- );let df: DataFrame df!(D1 [None, Some(2), Some(3), Some(4), None],D2 [3, 2, 3, 5, 3]).unwrap();// 取当前列第一个非空的值填充后面的空值let df2 df.fill_null(FillNullStrategy::Forward(None)).unwrap();// Forward(Option)向后遍历用遇到的第一个非空值或给定下标位置的值填充后面的空值// Backward(Option)向前遍历用遇到的第一个非空值或给定下标位置的值填充前面的空值// Mean用算术平均值填充// Min用最小值填充// Max: 用最大值填充// Zero用0填充// One用1填充// MaxBound用数据类型的取值范围的上界填充// MinBound用数据类型的取值范围的下界填充println!(fill_null :{:?}, df2);// 删除D1列中的None值let df3 df2.drop_nulls(Some([D1])).unwrap();println!(drop_nulls :{:?},df3);}fn compute_return(){println!(-----------compute_return test -----------------------);let df df!(code [600036.SH.to_string(),600036.SH.to_string(),600036.SH.to_string()],date [NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],close [1.21,1.22,1.23],open [1.22,1.21,1.23],high [1.22,1.25,1.24],low [1.19, 1.20,1.21],).unwrap();let _df df.clone().lazy().with_columns([(col(close)/col(close).first()-lit(1.0)).alias(ret)]).collect().unwrap();println!(_df :{},_df) }fn standardlize_center(){println!(------------- standardlize_center test ---------------- );let df: DataFrame df!(D1 [1, 2, 3, 4, 5],D2 [3, 2, 3, 5, 3]).unwrap();// 进行标准化对所有的列每个值除以本列最大值// cast(): 由int Float64let standardization df.lazy().select([col(*).cast(DataType::Float64) / col(*).cast(DataType::Float64).max()]);// 对于标准化后的列进行中心化let center standardization.select([col(*) - col(*).mean()]).collect().unwrap();println!(standardlize : {:?},center); }fn create_list_in_df_by_apply(){println!(----------creat_list_in_df_by_apply test ------------------------);let df df!(lang [go,rust, go, julia,julia,rust,rust],users [223,1032, 222, 42,1222,3213,4445],year [2020,2021,2022,2023,2024,2025,2026]).unwrap();println!(df :{},df);let out df.clone().lazy().group_by([col(lang)]).agg([col(users) .apply(|s| { let v s.i32().unwrap();let out v.into_iter().map(|v| match v {Some(v_) v_ ,_ 0}).collect::Veci32();Ok(Some(Series::new(_, out)))}, GetOutput::default()).alias(aggr_vec),]) //.with_column(col(aggr_sum).list().alias(aggr_sum_first)) .collect().unwrap();println!({}, out); }fn create_struct_in_df_by_apply(){println!(-----------------create_struct_in_df_by_apply test -------------------------);// TOML features dtype-structuse polars::prelude::*;let df df!(keys [a, a, b],values [10, 7, 1],).unwrap();let out df.clone().lazy().with_column(col(values).apply(|s| {let s s.i32()?;let out_1: VecOptioni32 s.into_iter().map(|v| match v {Some(v_) Some(v_ * 10),_ None,}).collect();let out_2: VecOptioni32 s.into_iter().map(|v| match v {Some(v_) Some(v_ * 20),_ None,}).collect();let out df! (v1 out_1,v2 out_2,).unwrap().into_struct(vals).into_series();Ok(Some(out))},GetOutput::default())) .collect().unwrap();println!({}, out); }fn field_value_counts(){println!(--------------field_value_counts test---------------);let ratings df!(Movie [Cars, IT, ET, Cars, Up, IT, Cars, ET, Up, ET],Theatre [NE, ME, IL, ND, NE, SD, NE, IL, IL, SD],Avg_Rating [4.5, 4.4, 4.6, 4.3, 4.8, 4.7, 4.7, 4.9, 4.7, 4.6],Count [30, 27, 26, 29, 31, 28, 28, 26, 33, 26],).unwrap();println!({}, ratings);let out ratings.clone().lazy().select([col(Theatre).value_counts(true, true, count.to_string(), false)]).collect().unwrap();println!({}, out);} // 宏macro_rules! structs_to_dataframe {($input:expr, [$($field:ident),]) {{// Extract the field values into separate vectors$(let mut $field Vec::new();)*for e in $input.into_iter() {$($field.push(e.$field);)*}df! {$(stringify!($field) $field,)*}}}; }macro_rules! dataframe_to_structs_todo {($df:expr, $StructName:ident,[$($field:ident),]) {{// 把df 对应的fields VecStructName,let mut vec:Vec$StructName Vec::new();vec}}; }fn df_to_structs_by_macro_todo(){println!(---------------df_to_structs_by_macro_todo test -------------------);let df df!(date [NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],close [1.21,1.22,1.23],open [1.22,1.21,1.23],high [1.22,1.25,1.24],low [1.19, 1.20,1.21],).unwrap();// 把df VecBarstruct Bar {date:NaiveDate,close:f64,open:f64,high:f64,low:f64,}impl Bar {fn bar(date:NaiveDate, close:f64,open:f64,high:f64,low:f64) - Self{Bar{date,close,open,high,low}}}let bars: VecBar dataframe_to_structs_todo!(df, Bar,[date,close,open,high,low]);println!(df:{:?},df); }fn structs_to_df_by_macro(){println!( ---------------- structs_to_df_by_macro test -----------------------);struct Bar {date:NaiveDate,close:f64,open:f64,high:f64,low:f64,}impl Bar {fn new(date:NaiveDate, close:f64,open:f64,high:f64,low:f64) - Self{Bar{date,close,open,high,low}}}let test_bars:VecBar vec![Bar::new(NaiveDate::from_ymd_opt(2024,1,1).unwrap(),10.1,10.12,10.2,9.99),Bar::new(NaiveDate::from_ymd_opt(2024,1,2).unwrap(),10.2,10.22,10.3,10.1)];let df structs_to_dataframe!(test_bars, [date,close,open,high,low]).unwrap();println!(df:{:?},df); }fn df_to_structs_by_iter(){println!(---------------df_to_structs_by_iter test----------------);// toml :features dtype-structlet now Instant::now();#[derive(Debug, Clone)]struct Bar {code :String,date:NaiveDate,close:f64,open:f64,high:f64,low:f64,}impl Bar {fn new(code:String,date:NaiveDate, close:f64,open:f64,high:f64,low:f64) - Self{Bar{code,date,close,open,high,low}}}let df df!(code [600036.SH.to_string(),600036.SH.to_string(),600036.SH.to_string()],date [NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],close [1.21,1.22,1.23],open [1.22,1.21,1.23],high [1.22,1.25,1.24],low [1.19, 1.20,1.21],).unwrap();let mut bars:VecBar Vec::new();let rows_data df.into_struct(bars);let start_date NaiveDate::from_ymd_opt(1970, 1, 2).unwrap();for row_data in rows_data{let code row_data.get(0).unwrap();let mut new_code .to_string();if let AnyValue::String(value) code{new_code value.to_string();}let mut new_date NaiveDate::from_ymd_opt(2000,1,1).unwrap(); let since_days start_date.signed_duration_since(NaiveDate::from_ymd_opt(1,1,1).unwrap());let date row_data.get(1).unwrap();if let AnyValue::Date(dt) date {let tmp_date NaiveDate::from_num_days_from_ce_opt(dt).unwrap();new_date tmp_date.checked_add_signed(since_days).unwrap();}let open row_data[3].extract::f64().unwrap();let high row_data[4].extract::f64().unwrap();let close row_data[2].extract::f64().unwrap();let low row_data[5].extract::f64().unwrap();bars.push(Bar::new(new_code,new_date,close,open,high,low));}println!(df_to_structs2 structchunk : cost time :{:?},now.elapsed().as_secs_f32());println!(bars :{:?},bars); }fn df_to_structs_by_zip(){println!(-----------df_to_structs_by_zip test --------------------);// 同样适用df - struct ,tuplehashmap 等let now Instant::now();#[derive(Debug, Clone)]struct Bar {code :String,date:NaiveDate,close:f64,open:f64,high:f64,low:f64,}impl Bar {fn new(code:String,date:NaiveDate, close:f64,open:f64,high:f64,low:f64) - Self{Bar{code,date,close,open,high,low}}}let df df!(code [600036.SH.to_string(),600036.SH.to_string(),600036.SH.to_string()],date [NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],close [1.21,1.22,1.23],open [1.22,1.21,1.23],high [1.22,1.25,1.24],low [1.19, 1.20,1.21],).unwrap();let bars : VecBar df[code].str().unwrap().iter().zip(df[date].date().unwrap().as_date_iter()).zip(df[close].f64().unwrap().iter()).zip(df[open].f64().unwrap().iter()).zip(df[high].f64().unwrap().iter()).zip(df[low].f64().unwrap().iter()).map(|(((((code,date),close),open),high),low)| Bar::new(code.unwrap().to_string(),date.unwrap(),close.unwrap(),open.unwrap(),high.unwrap(),low.unwrap())).collect();println!(df_to_struct3 zip : cost time :{:?} seconds!,now.elapsed().as_secs_f32());println!(bars :{:?},bars);//izip! from itertools --其它参考--省各种复杂的括号//use itertools::izip;//izip!(code, date, close, open,high,low).collect::Vec_() // Vec of 4-tuples}fn df_to_vec_tuples_by_izip(){println!(-------------df_to_tuple_by_izip test---------------);use itertools::izip;// In my real code this is generated from two joined DFs.let df df!(code [600036.sh.to_string(),600036.sh.to_string(),600036.sh.to_string()],date [NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],close [1.21,1.22,1.23],open [1.22,1.21,1.23],high [1.22,1.25,1.24],low [1.19, 1.20,1.21],).unwrap();let mut dates df.column(date).unwrap().date().unwrap().as_date_iter();let mut codes df.column(code).unwrap().str().unwrap().iter();let mut closes df.column(close).unwrap().f64().unwrap().iter();let mut tuples Vec::new();for (date, code, close) in izip!(mut dates, mut codes, mut closes){//println!({:?} {:?} {:?}, date.unwrap(), code.unwrap(), close.unwrap());tuples.push((date.unwrap(),code.unwrap(),close.unwrap()));}// 或这种方式let tuples2 izip!(mut dates, mut codes, mut closes).collect::Vec_();println!(tuples :{:?},tuples);println!(tuples2 :{:?},tuples2); }fn series_to_vec(){println!(------------series_to_vec test-----------------------);let df df!(date [NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],).unwrap();let vec :VecOptionNaiveDate df[date].date().unwrap().as_date_iter().collect();println!(vec :{:?},vec) }fn series_to_vec2(){println!(------------series_to_vec2 test----------------------);let df df!(lang [rust,go,julia],).unwrap();let vec:VecOptionstr df[date].str().unwrap().into_iter().map(|s|match s{Some(v_) Some(v_),_ None,}).collect();println!(vec:{:?},vec);}fn structs_in_df(){println!(-----------structs_in_df test -----------------);// feature dtype-structlet df df!(code [600036.SH.to_string(),600036.SH.to_string(),600036.SH.to_string()],date [NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],close [1.21,1.22,1.23],open [1.22,1.21,1.23],high [1.22,1.25,1.24],low [1.19, 1.20,1.21],).unwrap().into_struct(bars).into_series();println!({}, df);// how to get series from struct column?let out df.struct_().unwrap().field_by_name(close).unwrap();println!(out :{},out);// how to get struct value in df let _ df.struct_().unwrap().into_iter().map(|rows| {println!(code :{} date :{} close:{},open:{},high:{},low:{},rows[0],rows[1],rows[2],rows[3],rows[4],rows[5]);}).collect::Vec_();}fn list_in_df(){println!(-------------list_in_df test ------------------------------);let df df!(code [600036.SH.to_string(),600036.SH.to_string(),600036.SH.to_string()],date [NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],close [1.21,1.22,1.23],open [1.22,1.21,1.23],high [1.22,1.25,1.24],low [1.19, 1.20,1.21],).unwrap();let lst df[close].as_list().get(0).unwrap();println!(lst :{:?},lst);}fn serialize_df_to_json(){println!(--------------- serialize_df_to_json test -----------------------);// toml features serdelet df df!(code [600036.SH.to_string(),600036.SH.to_string(),600036.SH.to_string()],date [NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],close [1.21,1.22,1.23],open [1.22,1.21,1.23],high [1.22,1.25,1.24],low [1.19, 1.20,1.21],).unwrap();let df_json serde_json::to_value(df).unwrap();println!(df_json {df_json}); }fn serialize_df_to_binary_todo(){println!(---------serialize_df_to_binary_todo test -------------);// toml features serdelet df df!(code [600036.SH.to_string(),600036.SH.to_string(),600036.SH.to_string()],date [NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],close [1.21,1.22,1.23],open [1.22,1.21,1.23],high [1.22,1.25,1.24],low [1.19, 1.20,1.21],).unwrap();// todo//let df_binary serde_json::to_value(df).unwrap();//println!(df_json {df_binary}); }fn df_to_ndarray(){println!(-------------- df_to_ndarray test ------------------------);// toml features ndarraylet df df!(code [600036.SH.to_string(),600036.SH.to_string(),600036.SH.to_string()],date [NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],close [1.21,1.22,1.23],open [1.22,1.21,1.23],high [1.22,1.25,1.24],low [1.19, 1.20,1.21],).unwrap();// ndarray 化先去除非f64列let df_filter df.lazy().select([all().exclude([code,date])]).collect().unwrap();let ndarray df_filter.to_ndarray::Float64Type(IndexOrder::Fortran).unwrap();println!(ndarray :{},ndarray); }fn df_apply(){println!(--------------df_apply--------------------);// df_apply: apply应用于df的一列// 将其中的code列小写改成大写// mut !let mut df df!(code [600036.sh.to_string(),600036.sh.to_string(),600036.sh.to_string()],date [NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],close [1.21,1.22,1.23],open [1.22,1.21,1.23],high [1.22,1.25,1.24],low [1.19, 1.20,1.21],).unwrap();// fn code_to_uppercase(code_val: Series) - Series {code_val.str().unwrap().into_iter().map(|opt_code: Optionstr| {opt_code.map(|code: str| code.to_uppercase())}).collect::StringChunked().into_series()}// 对 code列进行str_to_upper操作 ,把本列的小写改成大写,有两种方法// method 1//df.apply(code, code_to_uppercase).unwrap();// method 2df.apply_at_idx(0, code_to_uppercase).unwrap(); // 对第0列即首列进行操作println!(df {},df);}fn write_read_parquet_files(){println!(------------ write_read_parquet_files test -------------------------);// features parquetlet mut df df!(code [600036.sh.to_string(),600036.sh.to_string(),600036.sh.to_string()],date [NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],close [1.21,1.22,1.23],open [1.22,1.21,1.23],high [1.22,1.25,1.24],low [1.19, 1.20,1.21],).unwrap();write_parquet(mut df);let df_ read_parquet(600036SH.parquet);let _df_ scan_parquet(600036SH.parquet).select([all()]).collect().unwrap();assert_eq!(df,df_);assert_eq!(df,_df_);println!(pass write_read parquet test!);fn write_parquet(df : mut DataFrame){let mut file std::fs::File::create(600036SH.parquet).unwrap();ParquetWriter::new(mut file).finish(df).unwrap();}fn read_parquet(filepath:str) -DataFrame{let mut file std::fs::File::open(filepath).unwrap();let df ParquetReader::new(mut file).finish().unwrap();df}fn scan_parquet(filepath:str) -LazyFrame{let args ScanArgsParquet::default();let lf LazyFrame::scan_parquet(filepath, args).unwrap();lf}}fn date_to_str_in_column(){println!(---------------date_t0_str test----------------------);// feature temporallet mut df df!(code [600036.sh.to_string(),600036.sh.to_string(),600036.sh.to_string()],date [NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],close [1.21,1.22,1.23],open [1.22,1.21,1.23],high [1.22,1.25,1.24],low [1.19, 1.20,1.21],).unwrap();// 增加一列把date - date_strlet df df.clone().lazy().with_columns([cols([date]).dt().to_string(%Y-%h-%d).alias(date_str)]).collect().unwrap();println!(df:{},df); }fn when_logicial_in_df(){println!(------------------when_condition_in_df test----------------------);let df df!(name [c,julia,go,python,rust,c#,matlab],run-time[1.0,1.11,1.51,3.987,1.01,1.65,2.11]).unwrap();// 当运行速度要在[1.0,1.5]之间为true,其它为falselet df_conditional df.clone().lazy().select([col(run-time),when(col(run-time).lt_eq(1.50).and(col(run-time).gt_eq(1.0))).then(lit(true)).otherwise(lit(false)).alias(speed_conditional),]).collect().unwrap();println!({}, df_conditional); }fn str_to_datetime_date_cast_in_df(){println!(--------------date_cast_in_df test---------------------------);// features strings 否则str()有问题let df df!(custom [Tom,Jack,Rose],login [2024-08-14,2024-08-12,2023-08-09],//首次登陆日期order [2024-08-14 10:15:32,2024-08-14 11:22:32,2024-08-14 14:12:52],//下单时间send [2024-08-15 10:25:38,2024-08-15 14:28:38,2024-08-16 09:07:32],//快递时间).unwrap();let out df.lazy().with_columns([col(login).str().to_date(StrptimeOptions::default()).alias(login_dt)]).with_columns([col(login).str().to_datetime(Some(TimeUnit::Microseconds),None,StrptimeOptions::default(),lit(raise)).alias(login_dtime)]).with_columns([col(order).str().strptime(DataType::Datetime(TimeUnit::Milliseconds, None),StrptimeOptions::default(),lit(raise),).alias(order_dtime),col(send).str().strptime(DataType::Datetime(TimeUnit::Milliseconds, None),StrptimeOptions::default(),lit(raise), // raise an error if the parsing fails).alias(send_dtime),]).with_columns([(col(send_dtime) - col(order_dtime)).alias(duration(seconds)).dt().total_seconds()]).collect().unwrap();println!(out :{},out); }fn unnest_struct_in_df(){// unnest() 将dataframe中struct列执行展开操作// 生成带struct的dataframelet mut df: DataFrame df!(company [ailibaba, baidu],profit [777277778.0, 86555555.9]).unwrap();let series df.clone().into_struct(info).into_series();let mut _df df.insert_column(0, series).unwrap();println!(_df :{},df);// unnest() into_structlet out df.lazy().with_column(col(info).struct_().rename_fields(vec![co..to_string(), pl.to_string()]))// 将struct所有字段展开.unnest([info]).collect().unwrap();println!(out :{}, out); // _df :shape: (2, 3) // ┌───────────────────────────┬──────────┬──────────────┐ // │ info ┆ company ┆ profit │ // │ --- ┆ --- ┆ --- │ // │ struct[2] ┆ str ┆ f64 │ // ╞═══════════════════════════╪══════════╪══════════════╡ // │ {ailibaba,7.77277778e8} ┆ ailibaba ┆ 7.77277778e8 │ // │ {baidu,8.6556e7} ┆ baidu ┆ 8.6556e7 │ // └───────────────────────────┴──────────┴──────────────┘ // out :shape: (2, 4) // ┌──────────┬──────────────┬──────────┬──────────────┐ // │ co. ┆ pl ┆ company ┆ profit │ // │ --- ┆ --- ┆ --- ┆ --- │ // │ str ┆ f64 ┆ str ┆ f64 │ // ╞══════════╪══════════════╪══════════╪══════════════╡ // │ ailibaba ┆ 7.77277778e8 ┆ ailibaba ┆ 7.77277778e8 │ // │ baidu ┆ 8.6556e7 ┆ baidu ┆ 8.6556e7 │ // └──────────┴──────────────┴──────────┴──────────────┘ }fn as_struct_in_df(){// features lazylet df: DataFrame df!(company [ailibaba, baidu],profit [777277778.0, 86555555.9]).unwrap();// as_struct: 生成相关struct列let _df df.clone().lazy().with_columns([as_struct(vec![col(company),col(profit)]).alias(info)]).collect().unwrap();let df_ df.clone().lazy().with_columns([as_struct(vec![col(*)]).alias(info)]).collect().unwrap();assert_eq!(_df,df_);println!(df :{},_df);// df :shape: (2, 3)// ┌──────────┬──────────────┬───────────────────────────┐// │ company ┆ profit ┆ info │// │ --- ┆ --- ┆ --- │// │ str ┆ f64 ┆ struct[2] │// ╞══════════╪══════════════╪═══════════════════════════╡// │ ailibaba ┆ 7.77277778e8 ┆ {ailibaba,7.77277778e8} │// │ baidu ┆ 8.6556e7 ┆ {baidu,8.6556e7} │// └──────────┴──────────────┴───────────────────────────┘}fn struct_apply_in_df(){let df df!(lang [julia, go, rust,c,c],ratings [AAAA, AAA, AAAAA,AAAA,AAA],users [201,303,278,99,87],references[5,6,9,4,1] ).unwrap();// 需求生成一列struct {lang,ratings,users},并应用apply对struct进行操作,具体见表let out df.lazy().with_columns([// 得到 struct 列as_struct(vec![col(lang), col(ratings),col(users)])// 应用 apply.apply(|s| {// 从series得到structlet ss s.struct_().unwrap();// 拆出 Serieslet s_lang ss.field_by_name(lang).unwrap();let s_ratings ss.field_by_name(ratings).unwrap();let s_users ss.field_by_name(users).unwrap();// downcast the Series to their known typelet _s_lang s_lang.str().unwrap();let _s_ratings s_ratings.str().unwrap();let _s_users s_users.i32().unwrap();// zip serieslet out: StringChunked _s_lang.into_iter().zip(_s_ratings).zip(_s_users).map(|((opt_lang, opt_rating),opt_user)| match (opt_lang, opt_rating,opt_user) {(Some(la), Some(ra),Some(us)) Some(format!({}-{}-{},la,ra,us)),_ None,}).collect();Ok(Some(out.into_series()))},GetOutput::from_type(DataType::String),).alias(links-three),]).collect().unwrap();println!({}, out);// shape: (5, 5) // ┌───────┬─────────┬───────┬────────────┬────────────────┐ // │ lang ┆ ratings ┆ users ┆ references ┆ links-three │ // │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ // │ str ┆ str ┆ i32 ┆ i32 ┆ str │ // ╞═══════╪═════════╪═══════╪════════════╪════════════════╡ // │ julia ┆ AAAA ┆ 201 ┆ 5 ┆ julia-AAAA-201 │ // │ go ┆ AAA ┆ 303 ┆ 6 ┆ go-AAA-303 │ // │ rust ┆ AAAAA ┆ 278 ┆ 9 ┆ rust-AAAAA-278 │ // │ c ┆ AAAA ┆ 99 ┆ 4 ┆ c-AAAA-99 │ // │ c ┆ AAA ┆ 87 ┆ 1 ┆ c-AAA-87 │ // └───────┴─────────┴───────┴────────────┴────────────────┘}

查看全文

http://www.w-s-a.com/news/119283/