您的位置:首页 > 其它

逻辑斯蒂回归

2015-01-08 19:48 176 查看
import math

feature=[]

result=[]

theta=[]

tempfeature=[]

test_result=[]

def getDataFromFile():

with open('kr-vs-kp.data.txt') as fileData:

#recordIndex=0

for each in fileData:

cur=each.split(',')

feature.append([ord(eachItem)for eachItem in cur[:-1]])#transform chars to ASCII

#feature[recordIndex].insert(0,1)#

#recordIndex+=1

if cur[-1][:-1]=='won':

result.append(1)

else:

result.append(0)#'won\\n','nowin\\n'

feature.pop()

feature.pop()

tempfeature=feature

#print(feature[0])

#print(result) #read data set from file

def feature_scale(feature_list):

featureIndex=0

for eachFeatureListItem in feature_list:

#print('before scale the list is:',eachFeatureListItem)

averageValue=sum(eachFeatureListItem)/len(eachFeatureListItem)

maxValue=max(eachFeatureListItem)

minValue=min(eachFeatureListItem)

feature_list[featureIndex]=[(ea-averageValue)/(maxValue-minValue+1) for ea in eachFeatureListItem]

#print("after scale the list is :",feature_list[featureIndex])

#print('max and min',max(feature_list[featureIndex]),min(feature_list[featureIndex]))

featureIndex+=1

def inittheta():

for th in range(37):

theta.append(1.)

def hypothesisOfLogisticRegression(theta_list,feature_record):#theta_list index 0-36,feature_list index 0-35

product_thetalist_featurelist=.0

for eachvalue in range(36):#calculate product of theta and feature from theta1 to theta35

product_thetalist_featurelist+=theta_list[eachvalue+1]*feature_record[eachvalue]

product_thetalist_featurelist+=theta_list[0]

#print(product_thetalist_featurelist)

#print(math.exp(-product_thetalist_featurelist))

#print(1/(1+math.exp(-product_thetalist_featurelist)) )

return 1/(1+math.exp(-product_thetalist_featurelist))

#print(hypothesisOfLogisticRegression(theta, feature[0]))

#print(sum(feature[0]))

def calculteCostFunction(thetaList,featureList,resultList):

for calCost in range(3196):

#calculate cost function

Costvalue=.0

Costvalue+=resultList[calCost]*math.log10(hypothesisOfLogisticRegression(thetaList, featureList[calCost]))+(1-resultList[calCost])*math.log10(1-hypothesisOfLogisticRegression(thetaList, featureList[calCost]))

#print('cost function value is',(-1/3196*Costvalue))

return (-1/3196*Costvalue)

def SGD():

temptheta=theta

tempCost=.0

Cost=1.

while(Cost-tempCost>0.0000000001):

for eachRecord in range(3196):

if(Cost-tempCost>0.0000000001):

Cost=calculteCostFunction(theta, feature, result)

print('the value of cost function is:',Cost)

for j in range(1,37):

theta[j]=theta[j]-0.01*(hypothesisOfLogisticRegression(temptheta,feature[eachRecord])-result[eachRecord])*feature[eachRecord][j-1]

theta[0]=theta[0]-0.01*(hypothesisOfLogisticRegression(temptheta,feature[eachRecord])-result[eachRecord])

temptheta=theta

tempCost=calculteCostFunction(theta, feature, result)

print('new cost is ',tempCost)

else:

print("Find the optimal theta")

#print(theta)

def GD():

temp2theta=theta

temp2Cost=.0

Cost2=1.

sumValue=[.0]

while(Cost2-temp2Cost>0.00001):

Cost2=calculteCostFunction(theta, feature, result)

print('Cost is',Cost2)

for j in range(1,37):

sumValue.append(.0)

for i in range(3196):

sumValue[j]+=(hypothesisOfLogisticRegression(temp2theta, feature[i])-result[i])*feature[i][j-1]

theta[j]-=0.01*sumValue[j]

for i1 in range(3196):

sumValue[0]+=(hypothesisOfLogisticRegression(temp2theta, feature[i1])-result[i1])

theta[0]-=0.01*sumValue[0]

temp2Cost=calculteCostFunction(theta, feature, result)

print('new cost is ',temp2Cost)

print('Find optimal theta ',theta)

print('cost is',temp2Cost)

def getTestResult():

for f1 in feature:

if(hypothesisOfLogisticRegression(theta, f1)>=0.5):

test_result.append(1)

else:

test_result.append(0)

def TestLRClassifier():

correct_count=0

wrong_count=0

for ii in range(3196):

if(test_result[ii]==result[ii]):

correct_count+=1

else:

wrong_count+=1

print('the correct proportion is ',correct_count/len(feature))

print('the wrong proportion is ',wrong_count/len(feature))

getDataFromFile()

inittheta()

#print(theta)

feature_scale(feature)

hypothesisOfLogisticRegression(theta, feature[0])

calculteCostFunction(theta, feature, result)

GD()

#print('finally',theta)

getTestResult()

TestLRClassifier()
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: