注册网站网,北京建筑公司有哪些,长春火车站电话咨询电话,物业公司网站模板要求#xff1a;
针对实验1和实验2构建的数据集信息分析
设计实现通过数据简介进行大类分类的程序
代码实现#xff1a;
训练集数据获取#xff1a;
read_data.py
import json
import pickledef read_intro():data []trypathrE:\Procedure\Python\Experiment\f…要求
针对实验1和实验2构建的数据集信息分析
设计实现通过数据简介进行大类分类的程序
代码实现
训练集数据获取
read_data.py
import json
import pickledef read_intro():data []trypathrE:\Procedure\Python\Experiment\first.jsonfilepathrE:\Procedure\Python\Experiment\res1.jsonwith open(filepath, r, encodingutf-8) as file:for line in file:record json.loads(line)if record.get(intro)!:data.append(record)return datadef store_model(model):# 加载模型filerE:\Procedure\Python\Experiment\Machine_Learning\model1.pkltry:# 尝试以 xb 模式打开文件如果文件不存在则创建新文件with open(file, wb) as file:# 使用 pickle 序列化模型并写入文件pickle.dump(model, file)except FileExistsError:print(File already exists. Cannot overwrite existing file.)except Exception as e:print(An error occurred:, e)# 使用加载的模型进行预测#predictions loaded_model.predict(X_test)
def store_report(report):filerE:\Procedure\Python\Experiment\Machine_Learning\class_report.txtwith open(file,w)as file:file.write(report)returndef get_model():m_pathrE:\Procedure\Python\Experiment\Machine_Learning\model1.pkltry:with open(m_path,rb)as file:loaded_modelpickle.load(file)return loaded_modelexcept Exception as e:print(e)return None
训练模型
多项式朴素贝叶斯模型用于单一标签文本分类
# 导入所需的库
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import read_data
import random
dataread_data.read_intro()
random.shuffle(data)
X [item[intro] for item in data]
y [item[mainclass] for item in data]
# 文本向量化
vectorizer TfidfVectorizer()
X_vectorized vectorizer.fit_transform(X)# 将数据集划分为训练集和测试集
X_train, X_test, y_train, y_test train_test_split(X_vectorized, y, test_size0.2, random_state42)# 初始化朴素贝叶斯分类器
model read_data.get_model()
#model MultinomialNB()
# 训练模型model.fit(X_train, y_train)# 在测试集上进行预测
y_pred model.predict(X_test)read_data.store_model(model)
# 评估模型性能
accuracy accuracy_score(y_test, y_pred)
print(Accuracy:, accuracy)# 打印分类报告
print(\nClassification Report:)
reportclassification_report(y_test, y_pred,zero_division0)
print(report)
read_data.store_report(report)结果