AI2. 鸢尾花分类_2
描述
机器学习库 sklearn 自带鸢尾花分类数据集,分为四个特征和三个类别,其中这三个类别在数据集中分别表示为 0, 1 和 2,请实现 transform_three2two_cate 函数的功能,该函数是一个无参函数,要求将数据集中 label 为 2 的数据进行移除,也就是说仅保留 label 为 0 和为 1 的情况,并且对 label 为 0 和 1 的特征数据进行保留,返回值为 numpy.ndarray 格式的训练特征数据和 label 数据,分别为命名为 new_feat 和 new_label。
要求输出测试集上的 accuracy_score,同时要求 accuracy_score 要不小于 0.95。
Python 3 解法, 执行用时: 1152ms, 内存消耗: 524288KB, 提交时间: 2022-07-27
import numpy as np from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler from sklearn.linear_model import LogisticRegression from sklearn.metrics import f1_score,roc_auc_score,accuracy_score from sklearn.tree import DecisionTreeClassifier def transform_three2two_cate(): data = datasets.load_iris() #其中data特征数据的key为data,标签数据的key为target #需要取出原来的特征数据和标签数据,移除标签为2的label和特征数据,返回值new_feat为numpy.ndarray格式特征数据,new_label为对应的numpy.ndarray格式label数据 #需要注意特征和标签的顺序一致性,否则数据集将混乱 #code start here index_arr = np.where(data.target == 2)[0] new_feat = np.delete(data.data, index_arr, 0) new_label = np.delete(data.target, index_arr) #code end here return new_feat,new_label def train_and_evaluate(): data_X,data_Y = transform_three2two_cate() train_x,test_x,train_y,test_y = train_test_split(data_X,data_Y,test_size = 0.2) #已经划分好训练集和测试集,接下来请实现对数据的训练 #code start here estimator = DecisionTreeClassifier() estimator.fit(train_x, train_y) y_predict = estimator.predict(test_x) #code end here #注意模型预测的label需要定义为 y_predict,格式为list或numpy.ndarray print(accuracy_score(y_predict,test_y)) if __name__ == "__main__": train_and_evaluate() #要求执行train_and_evaluate()后输出为: #1、{0,1},代表数据label为0和1 #2、测试集上的准确率分数,要求>0.95
Python 3 解法, 执行用时: 1153ms, 内存消耗: 524288KB, 提交时间: 2022-07-05
import numpy as np from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler from sklearn.linear_model import LogisticRegression from sklearn.metrics import f1_score,roc_auc_score,accuracy_score from sklearn.tree import DecisionTreeClassifier def transform_three2two_cate(): data = datasets.load_iris() #其中data特征数据的key为data,标签数据的key为target #需要取出原来的特征数据和标签数据,移除标签为2的label和特征数据,返回值new_feat为numpy.ndarray格式特征数据,new_label为对应的numpy.ndarray格式label数据 #需要注意特征和标签的顺序一致性,否则数据集将混乱 #code start here index2=np.where(np.array(data.target)==2) new_feat = np.delete(data.data, index2, axis = 0) new_label = np.delete(data.target, index2) #code end here return new_feat,new_label def train_and_evaluate(): data_X,data_Y = transform_three2two_cate() train_x,test_x,train_y,test_y = train_test_split(data_X,data_Y,test_size = 0.2) #已经划分好训练集和测试集,接下来请实现对数据的训练 #code start here dtc =DecisionTreeClassifier(max_depth=3) # 建立决策树模型 dtc.fit(train_x,train_y) # 训练模型 y_predict = dtc.predict(test_x) #输出{0,1} # data_X,tar_Y = transform_three2two_cate() # s=[] # for i in tar_Y: # if i not in s: # s.append(i) # for j in range(len(s)): # if j>0: # a=a+','+str(j) # else: # a=str(j) # print('{{{}}}'.format(a)) #code end here #注意模型预测的label需要定义为 y_predict,格式为list或numpy.ndarray print(accuracy_score(y_predict,test_y)) if __name__ == "__main__": train_and_evaluate() #要求执行train_and_evaluate()后输出为: #1、{0,1},代表数据label为0和1 #2、测试集上的准确率分数,要求>0.95
Python 3 解法, 执行用时: 1184ms, 内存消耗: 524288KB, 提交时间: 2022-07-16
import numpy as np from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler from sklearn.linear_model import LogisticRegression from sklearn.metrics import f1_score,roc_auc_score,accuracy_score from sklearn.tree import DecisionTreeClassifier def transform_three2two_cate(): data = datasets.load_iris() #其中data特征数据的key为data,标签数据的key为target #需要取出原来的特征数据和标签数据,移除标签为2的label和特征数据,返回值new_feat为numpy.ndarray格式特征数据, #new_label为对应的numpy.ndarray格式label数据 #需要注意特征和标签的顺序一致性,否则数据集将混乱 #code start here index_2=np.where(np.array(data.target==2)) new_feat=np.delete(data.data,index_2,0) new_label=np.delete(data.target,index_2) #code end here return new_feat,new_label def train_and_evaluate(): data_X,data_Y = transform_three2two_cate() train_x,test_x,train_y,test_y = train_test_split(data_X,data_Y,test_size = 0.2) #已经划分好训练集和测试集,接下来请实现对数据的训练 #code start here classifier = LogisticRegression(solver='liblinear',C=100) classifier.fit(train_x, train_y) y_predict=classifier.predict(test_x) #code end here #注意模型预测的label需要定义为 y_predict,格式为list或numpy.ndarray print(accuracy_score(y_predict,test_y)) if __name__ == "__main__": train_and_evaluate() #要求执行train_and_evaluate()后输出为: #1、{0,1},代表数据label为0和1 #2、测试集上的准确率分数,要求>0.95
Python 3 解法, 执行用时: 1190ms, 内存消耗: 524288KB, 提交时间: 2022-07-18
import numpy as np from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler from sklearn.linear_model import LogisticRegression from sklearn.metrics import f1_score,roc_auc_score,accuracy_score from sklearn.tree import DecisionTreeClassifier def transform_three2two_cate(): data = datasets.load_iris() #其中data特征数据的key为data,标签数据的key为target #需要取出原来的特征数据和标签数据,移除标签为2的label和特征数据,返回值new_feat为numpy.ndarray格式特征数据,new_label为对应的numpy.ndarray格式label数据 #需要注意特征和标签的顺序一致性,否则数据集将混乱 #code start here idx=np.where(np.array(data.target==2)) new_feat=np.delete(data.data,idx,axis=0) new_label=np.delete(data.target,idx) #code end here return new_feat,new_label def train_and_evaluate(): data_X,data_Y = transform_three2two_cate() train_x,test_x,train_y,test_y = train_test_split(data_X,data_Y,test_size = 0.2) #已经划分好训练集和测试集,接下来请实现对数据的训练 #code start here dtc=DecisionTreeClassifier() dtc.fit(train_x,train_y) y_predict=dtc.predict(test_x) #code end here #注意模型预测的label需要定义为 y_predict,格式为list或numpy.ndarray print(accuracy_score(y_predict,test_y)) if __name__ == "__main__": train_and_evaluate() #要求执行train_and_evaluate()后输出为: #1、{0,1},代表数据label为0和1 #2、测试集上的准确率分数,要求>0.95
Python 3 解法, 执行用时: 1194ms, 内存消耗: 524288KB, 提交时间: 2022-06-24
import numpy as np from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler from sklearn.linear_model import LogisticRegression from sklearn.metrics import f1_score,roc_auc_score,accuracy_score from sklearn.tree import DecisionTreeClassifier def transform_three2two_cate(): data = datasets.load_iris() #data导入以后是dict格式,其中特征数据的key为data,标签数据的key为target #需要取出原来的特征数据和标签数据,移除标签为2的label和特征数据,返回值new_feat为numpy.ndarray格式特征数据,new_label为对应的numpy.ndarray格式label数据 #需要注意特征和标签的顺序一致性,否则数据集将混乱 #code start here label = list(data.target) two_index = list(label).index(2) new_feat = data.data[:two_index] new_label = label[:two_index] #code end here #print(set(new_label)) return new_feat,new_label def train_and_evaluate(): data_X,data_Y = transform_three2two_cate() train_x,test_x,train_y,test_y = train_test_split(data_X,data_Y,test_size = 0.2) #已经划分好训练集和测试集,接下来请实现对数据的训练 #code start here lr = LogisticRegression(penalty='l2',solver='liblinear',C=0.5,max_iter=1000) lr.fit(train_x,train_y) y_predict = lr.predict(test_x) #code end here #注意模型预测的label需要定义为 y_predict,格式为list或numpy.ndarray print(accuracy_score(y_predict,test_y)) if __name__ == "__main__": train_and_evaluate() #要求执行train_and_evaluate()后输出为: #1、{0,1},代表数据label为0和1 #2、测试集上的准确率分数,要求>0.95