Python3 决策树
# -*- coding: utf-8 -*-
“””
Created on Fri Dec 29 10:18:04 2017
@author: markli
“””
from sklearn.feature_extraction import DictVectorizer;
from sklearn import preprocessing;
from sklearn import tree;
from sklearn.externals.six import StringIO;
from sklearn.externals import joblib;
import csv;
import sys;
sys.path.append(‘../’);
filepath = ‘decisiontree.csv’;
f = open(filepath,’r’);
reader = csv.reader(f);
header = next(reader); #读取表头
print(“表头为 %s” % header);
feature_list = [];
label_list = [];
for row in reader:
label_list.append(row[len(row)-1]);
rowdic = {};
for i in range(1,len(row)-1):
rowdic[header[i]] = row[i];
feature_list.append(rowdic);
print(“特征值为 %s” % feature_list);
dv = DictVectorizer();
dummX = dv.fit_transform(feature_list).toarray();
print(“特征提取值矩阵为 %s” % str(dummX));
#目标值特征化
lb = preprocessing.LabelBinarizer();
dummY = lb.fit_transform(label_list);
print(“目标特征化值为 %s” % str(dummY));
clf = tree.DecisionTreeClassifier(criterion=’entropy’);
clf = clf.fit(dummX,dummY);
print(“树 %s” % str(clf));
#保存模型
with open(‘dicisiontreeModel.dot’,’w’) as f:
f = tree.export_graphviz(clf,feature_names=dv.get_feature_names(),out_file=f);
joblib.dump(clf,’dicisionTree_entropyModel.dot’);
#读取模型 预测
”’
x = np.array([0,1,0,0,0,1,0,1,1,0]); #测试值
print(x.reshape((1,10)));
#sys.path.append(‘F:\\Python\\ML’);
#f = open(‘F:\\Python\\ML\\dicisionTree_entropyModel.dot’);
decisiontree.csv 文件格式
clf = joblib.load(‘F:\\Python\\ML\\dicisionTree_entropyModel.dot’);
y = clf.predict(x.reshape((1,10))); #预测结果
print(y);
”’