phpcms律师网站模板,怎么建立以太网,做网站送推广,做网站需要要多少钱一、目录
代码讲解
二、实现。
1、代码讲解#xff0c;trainer 实现。 transformers通过trainer 集成deepspeed功能#xff0c;所以中需要进行文件配置#xff0c;即可实现deepspeed的训练。 微调代码#xff1a; 参数定义—数据处理----模型创建/评估方式----trainer 实现。 transformers通过trainer 集成deepspeed功能所以中需要进行文件配置即可实现deepspeed的训练。 微调代码 参数定义—数据处理----模型创建/评估方式----trainer 框架训练 注意 V100 显卡不包括float16 精度训练。
import deepspeed
deepspeed.ops.op_builder.CPUAdamBuilder().load()
import nltk
import torch
import evaluate
import datasets
import numpy as np
from nltk.tokenize import sent_tokenize
from torch.nn.utils.rnn import pad_sequence
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments
nltk.download(punkt)
import gc
import torch######################################定义参数####################################
dataset_name samsum # 数据集名称
#model_namegoogle/flan-t5-xxl # 模型名称
model_namegoogle/flan-t5-xl # 模型名称
max_input_length 256
max_gen_length 128
output_dir checkpoints
num_train_epochs 5
learning_rate 5e-5
deepspeed_config ds_config.json # deepspeed配置文件
per_device_train_batch_size5 # batch size设置为1因为太大导致OOM
per_device_eval_batch_size5
gradient_accumulation_steps10 # 由于单卡的batch size为1为了扩展batch size使用梯度累加#################################加载数据集与数据预处理#########################################
tokenizer AutoTokenizer.from_pretrained(model_name)
dataset datasets.load_dataset(dataset_name)
print(dataset[train][0])def preprocess(examples):dialogues [summarize: dia for dia in examples[dialogue]]# summaries [summ for summ in examples[summary]]model_inputs tokenizer(dialogues, max_lengthmax_input_length, truncationTrue)labels tokenizer(text_targetexamples[summary], max_lengthmax_gen_length, truncationTrue)model_inputs[labels] labels[input_ids]return model_inputstokenized_dataset dataset.map(preprocess, batchedTrue, remove_columns[dialogue, summary, id])
# print(tokenized_dataset[train][input_ids][0]) # 打印结果 对map后的数据进行查看。def collate_fn(features):batch_input_ids [torch.LongTensor(feature[input_ids]) for feature in features]batch_attention_mask [torch.LongTensor(feature[attention_mask]) for feature in features]batch_labels [torch.LongTensor(feature[labels]) for feature in features]batch_input_ids pad_sequence(batch_input_ids, batch_firstTrue, padding_valuetokenizer.pad_token_id)batch_attention_mask pad_sequence(batch_attention_mask, batch_firstTrue, padding_value0)batch_labels pad_sequence(batch_labels, batch_firstTrue, padding_value-100)return {input_ids: batch_input_ids,attention_mask: batch_attention_mask,labels: batch_labels}##############################加载模型,采用seq2seqLM模型并进行测试##############################
model AutoModelForSeq2SeqLM.from_pretrained(model_name)#用于测试的代码
#dataloader DataLoader(tokenized_dataset[test], shuffleFalse, batch_size4, collate_fncollate_fn)
# batch next(iter(dataloader))
# #print(batch)
# # 用于测试的代码
# dataloader DataLoader(tokenized_dataset[test], shuffleFalse, batch_size4, collate_fncollate_fn)
# batch next(iter(dataloader))
# output model(**batch)
#print(output)
#############################################模型训练并采用trainer 架构####################################
print(train....)
metric evaluate.load(rouge)
def compute_metrics(eval_preds):preds, labels eval_predsif isinstance(preds, tuple):preds preds[0]decoded_preds tokenizer.batch_decode(preds, skip_special_tokensTrue)labels np.where(labels ! -100, labels, tokenizer.pad_token_id)decoded_labels tokenizer.batch_decode(labels, skip_special_tokensTrue)decoded_preds [\n.join(sent_tokenize(pred.strip())) for pred in decoded_preds]decoded_labels [\n.join(sent_tokenize(label.strip())) for label in decoded_labels]result metric.compute(predictionsdecoded_preds, referencesdecoded_labels, use_stemmerTrue)result {k: round(v * 100, 4) for k, v in result.items()}prediction_lens [np.count_nonzero(pred ! tokenizer.pad_token_id) for pred in preds]result[gen_len] np.mean(prediction_lens)return resulttraining_args Seq2SeqTrainingArguments(output_diroutput_dir,per_device_train_batch_sizeper_device_train_batch_size,per_device_eval_batch_sizeper_device_eval_batch_size,gradient_accumulation_stepsgradient_accumulation_steps,eval_accumulation_steps1, # 防止评估时导致OOMpredict_with_generateTrue,learning_ratelearning_rate,num_train_epochsnum_train_epochs,# logging evaluation strategieslogging_dirlogs,logging_strategysteps,logging_steps50, # 每50个step打印一次logevaluation_strategysteps,eval_steps500, # 每500个step进行一次评估save_steps500,save_total_limit2,load_best_model_at_endTrue,deepspeeddeepspeed_config, # deepspeed配置文件的位置report_toall
)trainer Seq2SeqTrainer(modelmodel,argstraining_args,train_datasettokenized_dataset[train],eval_datasettokenized_dataset[validation],data_collatorcollate_fn,compute_metricscompute_metrics,
)
trainer.train()
gc.collect()
torch.cuda.empty_cache()
# 打印验证集上的结果
# print(trainer.evaluate(tokenized_dataset[validation]))
# # 打印测试集上的结果
# print(trainer.evaluate(tokenized_dataset[test]))
# 保存最优模型
trainer.save_model(best.pt)
#export NCCL_IB_DISABLE1; export NCCL_P2P_DISABLE1; NCCL_DEBUGINFO deepspeed --includelocalhost:0,1 test1.py配置文件ds_config.json
{fp16: {enabled: auto},optimizer: {type: AdamW,params: {lr: auto,betas: auto,eps: auto,weight_decay: auto}},scheduler: {type: WarmupLR,params: {warmup_min_lr: auto,warmup_max_lr: auto,warmup_num_steps: auto}},zero_optimization: {stage: 3,offload_optimizer: {device: cpu,pin_memory: true},offload_param: {device: cpu,pin_memory: true},overlap_comm: true,contiguous_gradients: true,sub_group_size: 1e9,reduce_bucket_size: auto,stage3_prefetch_bucket_size: auto,stage3_param_persistence_threshold: auto,stage3_max_live_parameters: 1e9,stage3_max_reuse_distance: 1e9,stage3_gather_16bit_weights_on_model_save: false},gradient_accumulation_steps: auto,gradient_clipping: auto,steps_per_print: 2000,train_batch_size: auto,train_micro_batch_size_per_gpu: auto,wall_clock_breakdown: false
}启动 单机多卡
export NCCL_IB_DISABLE1; export NCCL_P2P_DISABLE1; NCCL_DEBUGINFO deepspeed --includelocalhost:0,1 test1.pyoutput.log 21 二、代码讲解peft微调trainer 实现。
import os
import torch
import random
import datasets
import numpy as np
from typing import Dict
from transformers import (AutoModelForCausalLM,AutoTokenizer,DataCollatorForSeq2Seq,TrainingArguments,Trainer
)
from peft import (LoraConfig,TaskType,get_peft_model,get_peft_model_state_dict,
)def set_random_seed(seed):if seed is not None and seed 0:random.seed(seed)np.random.seed(seed)torch.manual_seed(seed)torch.random.manual_seed(seed)torch.cuda.manual_seed(seed)torch.cuda.manual_seed_all(seed)torch.backends.cudnn.deterministic Trueset_random_seed(1234)# 1. 设置参数
# LoRA参数
LORA_R 8
LORA_ALPHA 32
LORA_DROPOUT 0.1
# 训练参数
EPOCHS3
LEARNING_RATE5e-5
OUTPUT_DIR./checkpoints
BATCH_SIZE4 # 2
GRADIENT_ACCUMULATION_STEPS3
# 其他参数
MODEL_PATH bigscience/bloomz-7b1-mt
DATA_PATH ./data/belle_open_source_1M.train.json
MAX_LENGTH 512
PATTERN {}\n{}
DS_CONFIG ds_zero2_config.json
tokenizer AutoTokenizer.from_pretrained(MODEL_PATH) # 加载tokenizer
# 加载数据
dataset datasets.load_dataset(json, data_filesDATA_PATH)
# print(dataset[train][0])# 2. tokenize
def tokenize(text: str, add_eos_tokenTrue):result tokenizer(text,truncationTrue,max_lengthMAX_LENGTH,paddingFalse,return_tensorsNone)# 判断是否要添加eos_tokenif (result[input_ids][-1] ! tokenizer.eos_token_idand len(result[input_ids]) MAX_LENGTHand add_eos_token):result[input_ids].append(tokenizer.eos_token_id)result[attention_mask].append(1)result[labels] result[input_ids].copy()return resultdef preprocess(example: Dict, train_on_inputs: bool False):prompt example[input]response example[target]text PATTERN.format(prompt, response)tokenized_inp tokenize(text)# 若train_on_inputs为False则将label中与input相关的token替换为-100if not train_on_inputs:tokenized_prompt tokenize(prompt,add_eos_tokenFalse)prompt_tokens_len len(tokenized_prompt[input_ids])tokenized_inp[labels] [-100]*prompt_tokens_len tokenized_inp[labels][prompt_tokens_len:]return tokenized_inptrain_data dataset[train].shuffle().map(preprocess, remove_columns[id, input, target])
print(train_data[0])# pad_to_multiple_of8表示padding的长度是8的倍数
collate_fn DataCollatorForSeq2Seq(tokenizer, pad_to_multiple_of8, return_tensorspt, paddingTrue)# 2. 加载模型
evice_map {: int(os.environ.get(LOCAL_RANK) or 0)}
# device_map指定模型加载的GPU;troch_dtypetorch.float16表示半精度加载模型
model AutoModelForCausalLM.from_pretrained(MODEL_PATH, torch_dtypetorch.float16, device_mapdevice_map)# 3. LoRA相关
lora_config LoraConfig(task_typeTaskType.CAUSAL_LM,inference_modeFalse,rLORA_R, # LoRA中低秩近似的秩lora_alphaLORA_ALPHA, # 见上文中的低秩矩阵缩放超参数lora_dropoutLORA_DROPOUT, # LoRA层的dropout
)
# 转换模型
model get_peft_model(model, lora_config)
model.config.use_cache False
old_state_dict model.state_dict
model.state_dict (lambda self, *_, **__: get_peft_model_state_dict(self, old_state_dict())
).__get__(model, type(model))
# 打印模型中的可训练参数
model.print_trainable_parameters()# 4. 训练参数
args TrainingArguments(output_dirOUTPUT_DIR, # checkpoint的存储目录per_device_train_batch_sizeBATCH_SIZE, # 单设备上的batch sizegradient_accumulation_stepsGRADIENT_ACCUMULATION_STEPS, # 梯度累加的step数warmup_steps100,num_train_epochsEPOCHS,learning_rateLEARNING_RATE,fp16True, # 使用混合精度训练logging_steps50,evaluation_strategyno, # 不进行评估save_strategysteps,save_steps2000, # 保存checkpoint的step数save_total_limit5, # 最多保存5个checkpointdeepspeedDS_CONFIG #deepspeed 配置
)# 5. 模型训练
trainer Trainer(modelmodel,train_datasettrain_data,eval_datasetNone,argsargs,data_collatorcollate_fn
)
trainer.train()
model.save_pretrained(best_model){train_micro_batch_size_per_gpu: auto,gradient_accumulation_steps: auto,steps_per_print: 50,gradient_clipping: 1.0,zero_optimization: {stage: 2,offload_optimizer: {device: cpu},contiguous_gradients: true,overlap_comm: true},zero_allow_untested_optimizer: true,fp16: {enabled: true,loss_scale: 0,loss_scale_window: 1000,hysteresis: 2,min_loss_scale: 1},optimizer: {type: Adam,params: {lr: auto,betas: auto,eps: auto,weight_decay: auto}},activation_checkpointing: {partition_activations: true,contiguous_memory_optimization: true},wall_clock_breakdown: false
}