-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathBinaryClassifier.py
More file actions
109 lines (95 loc) · 4.42 KB
/
BinaryClassifier.py
File metadata and controls
109 lines (95 loc) · 4.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# Using Logistic Regression for binary classification of houses in categories like Standard (0) & Premium (1)
# based on input features e.g. sqft area, bedrooms, age in years and price
import numpy as np
import matplotlib.pyplot as plt
from LogisticRegressor import LogisticRegressor
class BinaryClassifier:
########### init method runs the steps of training & prediction ###########
def __init__(self, epoch=100, alpha=0.3, reg=1):
# LOAD house prices in y while area, rooms and age in X
X, y = self.readInput("input/area_rooms_age_categories.csv")
xLabels = ['Area(sqft)','Bedrooms','Age(years)', 'Prices']
yLabel = 'Categories (y)'
self.plot(X, y, xLabels, yLabel, ['Standard', 'Premium'])
classifier = LogisticRegressor(numOfIterations=epoch, learningRate=alpha, regularizer=reg, scalingNeeded=True, biasNeeded=True, verbose=True)
# TRAIN the model (i.e. theta here)
print('\nTRAINING:\n')
theta = classifier.train(X, y) # alpha is learning rate for gradient descent
classifier.saveModel('model/bin_classification.model')
classifier.loadModel('model/bin_classification.model')
# VALIDATE model with training data
print('\nVAIDATION:\n')
yPred = classifier.validate(X, y)
self.printData(X, yPred, xLabels, yLabel)
self.plot(X, yPred, xLabels, yLabel, ['Standard', 'Premium'])
self.writeOutput(X, yPred, 'output/house_categories_validation.csv')
# PREDICT with trained model using sample data
print('\nPREDICTION:\n')
X = self.sampleData4Prediction()
yPred = classifier.predict(X)
self.printData(X, yPred, xLabels, yLabel)
self.plot(X, yPred, xLabels, yLabel, ['Standard', 'Premium'])
self.writeOutput(X, yPred, 'output/house_categories_prediction.csv')
# readData method loads all columns from left to right (except the last) in X and the last column in y
def readInput(self, fileName, delim=','):
data = np.genfromtxt(fileName, delimiter=delim)
n = data.shape[1]
X = data[:, 0:n-1]
y = data[:,-1:]
return X, y
def writeOutput(self, X, y, fileName, delim=','):
data = np.hstack([X, y])
np.savetxt(fileName, data, fmt='%.d', delimiter=delim)
return
# Plotting dataset
def plot(self, X, y, xLabels, yLabel, classLabels):
plt.figure(figsize=(15,4), dpi=100)
y = y.ravel()
rows, cols = X.shape
if cols != len(xLabels):
return
for c in range(0, cols):
plt.subplot(1, cols, c+1)
Xy0 = X[y == 0][:, c]
Xy1 = X[y == 1][:, c]
plt.scatter(range(1, Xy0.shape[0]+1), Xy0, color='b', label=classLabels[0])
plt.scatter(range(1, Xy1.shape[0]+1), Xy1, color='r', label=classLabels[1])
#plt.plot(range(1, Xy0.shape[0]+1), Xy0, 'g--')
#plt.plot(range(1, Xy1.shape[0]+1), Xy1, 'b--')
plt.xlabel('House #')
plt.ylabel(xLabels[c])
plt.legend()
plt.show()
# Print house prices with specific number of columns
def printData(self, X, y, xLabels, yLabel, delim='\t', fileName=None):
rows, cols = X.shape
if (rows != y.shape[0]) :
return
headLine = ''
colheads = len(xLabels)
for xLabel in xLabels:
headLine += xLabel + delim
headLine += yLabel +str('\n')
bodyLine = ''
for r in range(0, rows):
for c in range(0, cols):
bodyLine += str(X[r, c]) + delim
bodyLine += str(y[r,0])
bodyLine += str('\n')
if fileName is None:
print(headLine)
print (bodyLine)
else:
with open(fileName, "w") as f:
f.write(headLine)
f.write(bodyLine)
# Sample data for Prediction
def sampleData4Prediction(self):
areas = np.arange(1950.00, 1000.00, -100.00)[0:5].reshape(5,1)
bedrooms = np.arange(6.0, 1.0, -1.0)[0:5].reshape(5,1)
years = np.arange(7.0, 1.0, -1.0)[0:5].reshape(5,1)
prices = np.arange(280000.00, 220000.00, -10000.00)[0:5].reshape(5,1)
X = np.hstack([areas, bedrooms, years, prices])
return X
if True:
BinaryClassifier()