-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmixed_inputs.py
119 lines (95 loc) · 3.81 KB
/
mixed_inputs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# -*- coding: utf-8 -*-
"""
QMIND - Google Street View Appraiser
Levi Stringer, Colin Cumming, Jacob Laframboise, Nick Merz
This file uses a convolutional neural network on satellite and street view images,
and a multilayer perceptron on numerical data for houses to estimate their value.
Created on Sun Mar 3 14:34:04 2019
@author: Colin Cumming, Jacob Laframboise
"""
from sklearn.preprocessing import MinMaxScaler
# imports
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import concatenate
import numpy as np
import locale
import os
import matplotlib.pyplot as plt
# local folder imports
from pyimagesearch import attributeProcessing
from pyimagesearch import models
# paths
thisModulePath = os.path.abspath(__file__)
parentFolder = os.path.dirname(thisModulePath)
dataFolder = os.path.join(parentFolder, 'data')
inputPathHouseData = os.path.join(dataFolder, 'FINALDATASET.csv')
houseImagesPath = os.path.join(dataFolder, 'Images')
print("[INFO] loading numerical house data...")
df = attributeProcessing.get_house_attributes(inputPathHouseData)
# load the house images and normalize them
print("[INFO] loading house images...")
images = attributeProcessing.load_house_images(df, houseImagesPath)
images = images / 255.0
print("[INFO] processing data...")
split = train_test_split(df, images, test_size=0.25, random_state=42)
(trainAttrX, testAttrX, trainImagesX, testImagesX) = split
# max scaling for prices
maxPrice = trainAttrX["price"].max()
trainY = trainAttrX["price"] / maxPrice
testY = testAttrX["price"] / maxPrice
# split numerical data
(trainAttrX, testAttrX) = attributeProcessing.process_house_attributes(df, trainAttrX, testAttrX)
# convert to numpy arrays from pandas Series
trainY = trainY.to_numpy()
testY = testY.to_numpy()
# create models for numerical and image data
mlp = models.create_mlp(trainAttrX.shape[1], regress=False)
cnn = models.create_cnn(32, 64, 3, regress=False)
print(type(mlp))
print(type(cnn))
# create a model to merge outputs of mlp and cnn
combinedInput = concatenate([mlp.output, cnn.output])
x = Dense(4, activation="relu")(combinedInput)
x = Dense(1, activation="linear")(x)
model = Model(inputs=[mlp.input, cnn.input], outputs=x)
# compile model
opt = Adam(lr=1e-3, decay=1e-3 / 200)
model.compile(optimizer=opt, # metrics=["accuracy"],
loss="mean_absolute_percentage_error")
# train the model
print("[INFO] training model...")
history = model.fit(
[trainAttrX, trainImagesX], trainY,
validation_data=([testAttrX, testImagesX], testY),
epochs=30, batch_size=8)
# overfitting occurs after 30 epochs
# make predictions on the testing data
print("[INFO] predicting house prices...")
preds = model.predict([testAttrX, testImagesX])
# graph loss over training
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Learning Curve')
plt.ylabel('Mean Absolute Percentage Error')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show(block=True)
# compute the difference between the *predicted* house prices and the
# *actual* house prices, then compute the percentage difference and
# the absolute percentage difference
diff = preds.flatten() - testY
percentDiff = (diff / testY) * 100
absPercentDiff = np.abs(percentDiff)
# compute the mean and standard deviation of the absolute percentage
# difference
mean = np.mean(absPercentDiff)
std = np.std(absPercentDiff)
# finally, show some statistics on our model
locale.setlocale(locale.LC_ALL, "en_US.UTF-8")
print("[INFO] avg. house price: {}, std house price: {}".format(
locale.currency(df["price"].mean(), grouping=True),
locale.currency(df["price"].std(), grouping=True)))
print("[INFO] mean: {:.2f}%, std: {:.2f}%".format(mean, std))