Skip to content

Commit 5d75027

Browse files
authored
Add file
1 parent 0037881 commit 5d75027

File tree

1 file changed

+292
-0
lines changed

1 file changed

+292
-0
lines changed

Recommender system.ipynb

Lines changed: 292 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,292 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 2,
6+
"metadata": {},
7+
"outputs": [
8+
{
9+
"name": "stdout",
10+
"output_type": "stream",
11+
"text": [
12+
"root\n",
13+
" |-- movieId: integer (nullable = true)\n",
14+
" |-- rating: double (nullable = true)\n",
15+
" |-- userId: integer (nullable = true)\n",
16+
"\n"
17+
]
18+
}
19+
],
20+
"source": [
21+
"from pyspark.sql import SparkSession\n",
22+
"from pyspark.ml.recommendation import ALS\n",
23+
"from pyspark.ml.evaluation import RegressionEvaluator\n",
24+
"\n",
25+
"spark = SparkSession.builder.appName('recommender').getOrCreate()\n",
26+
"df = spark.read.csv('movielens_ratings.csv', inferSchema= True, header = True)\n",
27+
"df.printSchema()"
28+
]
29+
},
30+
{
31+
"cell_type": "code",
32+
"execution_count": 3,
33+
"metadata": {},
34+
"outputs": [
35+
{
36+
"name": "stdout",
37+
"output_type": "stream",
38+
"text": [
39+
"+-------+------+------+\n",
40+
"|movieId|rating|userId|\n",
41+
"+-------+------+------+\n",
42+
"| 2| 3.0| 0|\n",
43+
"| 3| 1.0| 0|\n",
44+
"| 5| 2.0| 0|\n",
45+
"+-------+------+------+\n",
46+
"only showing top 3 rows\n",
47+
"\n"
48+
]
49+
}
50+
],
51+
"source": [
52+
"df.show(3)"
53+
]
54+
},
55+
{
56+
"cell_type": "code",
57+
"execution_count": 4,
58+
"metadata": {},
59+
"outputs": [
60+
{
61+
"name": "stdout",
62+
"output_type": "stream",
63+
"text": [
64+
"+-------+------------------+------------------+------------------+\n",
65+
"|summary| movieId| rating| userId|\n",
66+
"+-------+------------------+------------------+------------------+\n",
67+
"| count| 1501| 1501| 1501|\n",
68+
"| mean| 49.40572951365756|1.7741505662891406|14.383744170552964|\n",
69+
"| stddev|28.937034065088994| 1.187276166124803| 8.591040424293272|\n",
70+
"| min| 0| 1.0| 0|\n",
71+
"| max| 99| 5.0| 29|\n",
72+
"+-------+------------------+------------------+------------------+\n",
73+
"\n"
74+
]
75+
}
76+
],
77+
"source": [
78+
"df.describe().show()"
79+
]
80+
},
81+
{
82+
"cell_type": "code",
83+
"execution_count": 6,
84+
"metadata": {},
85+
"outputs": [],
86+
"source": [
87+
"train, test = df.randomSplit([0.8, 0.2])"
88+
]
89+
},
90+
{
91+
"cell_type": "code",
92+
"execution_count": 7,
93+
"metadata": {},
94+
"outputs": [],
95+
"source": [
96+
"als = ALS(maxIter=5, regParam=0.01, userCol='userId', itemCol='movieId', ratingCol='rating')"
97+
]
98+
},
99+
{
100+
"cell_type": "code",
101+
"execution_count": 8,
102+
"metadata": {},
103+
"outputs": [
104+
{
105+
"name": "stdout",
106+
"output_type": "stream",
107+
"text": [
108+
"+-------+------+------+-----------+\n",
109+
"|movieId|rating|userId| prediction|\n",
110+
"+-------+------+------+-----------+\n",
111+
"| 31| 1.0| 26| -2.5238004|\n",
112+
"| 31| 1.0| 27|-0.59501255|\n",
113+
"| 31| 1.0| 4| 3.137197|\n",
114+
"| 85| 1.0| 28| -0.1683234|\n",
115+
"| 85| 1.0| 13| 2.2037606|\n",
116+
"| 85| 5.0| 8| 4.343044|\n",
117+
"| 85| 1.0| 29| 1.5260103|\n",
118+
"| 65| 1.0| 28| 3.4493313|\n",
119+
"| 53| 3.0| 13| 2.631197|\n",
120+
"| 53| 1.0| 25| -2.3101962|\n",
121+
"| 78| 1.0| 13| 0.54879403|\n",
122+
"| 78| 1.0| 11| 0.4418241|\n",
123+
"| 81| 5.0| 28| 0.8307642|\n",
124+
"| 81| 1.0| 1| -1.0092545|\n",
125+
"| 81| 1.0| 6| 2.4090357|\n",
126+
"| 81| 1.0| 19| 0.13363218|\n",
127+
"| 81| 1.0| 15| 0.5015665|\n",
128+
"| 28| 1.0| 23| -0.2624761|\n",
129+
"| 28| 1.0| 2| 1.4344041|\n",
130+
"| 76| 1.0| 1| 1.9119977|\n",
131+
"+-------+------+------+-----------+\n",
132+
"only showing top 20 rows\n",
133+
"\n"
134+
]
135+
}
136+
],
137+
"source": [
138+
"model = als.fit(train)\n",
139+
"predictions = model.transform(test)\n",
140+
"predictions.show()"
141+
]
142+
},
143+
{
144+
"cell_type": "code",
145+
"execution_count": 9,
146+
"metadata": {},
147+
"outputs": [
148+
{
149+
"name": "stdout",
150+
"output_type": "stream",
151+
"text": [
152+
"RMSE: 1.8124486699552562\n"
153+
]
154+
}
155+
],
156+
"source": [
157+
"evaluator = RegressionEvaluator(metricName = 'rmse', labelCol = 'rating', predictionCol = 'prediction')\n",
158+
"rmse = evaluator.evaluate(predictions)\n",
159+
"print('RMSE:', rmse)"
160+
]
161+
},
162+
{
163+
"cell_type": "code",
164+
"execution_count": 14,
165+
"metadata": {},
166+
"outputs": [
167+
{
168+
"name": "stdout",
169+
"output_type": "stream",
170+
"text": [
171+
"+------+-------+\n",
172+
"|userId|movieId|\n",
173+
"+------+-------+\n",
174+
"| 12| 4|\n",
175+
"| 12| 18|\n",
176+
"| 12| 22|\n",
177+
"| 12| 35|\n",
178+
"| 12| 38|\n",
179+
"| 12| 41|\n",
180+
"| 12| 45|\n",
181+
"| 12| 63|\n",
182+
"| 12| 79|\n",
183+
"| 12| 83|\n",
184+
"| 12| 95|\n",
185+
"| 12| 96|\n",
186+
"+------+-------+\n",
187+
"\n"
188+
]
189+
}
190+
],
191+
"source": [
192+
"this_user = test.filter(test['userId'] == 12).select('userId', 'movieId')\n",
193+
"this_user.show()"
194+
]
195+
},
196+
{
197+
"cell_type": "code",
198+
"execution_count": 15,
199+
"metadata": {},
200+
"outputs": [
201+
{
202+
"name": "stdout",
203+
"output_type": "stream",
204+
"text": [
205+
"+------+-------+----------+\n",
206+
"|userId|movieId|prediction|\n",
207+
"+------+-------+----------+\n",
208+
"| 12| 22| 1.6517887|\n",
209+
"| 12| 96| 0.1308065|\n",
210+
"| 12| 41| 1.4067035|\n",
211+
"| 12| 35| 0.7640405|\n",
212+
"| 12| 4|-1.1053085|\n",
213+
"| 12| 63| 3.851338|\n",
214+
"| 12| 45|0.70455414|\n",
215+
"| 12| 38| 2.8361285|\n",
216+
"| 12| 95| 0.9426958|\n",
217+
"| 12| 83| 0.6145076|\n",
218+
"| 12| 79| 1.3491223|\n",
219+
"| 12| 18| -0.656619|\n",
220+
"+------+-------+----------+\n",
221+
"\n"
222+
]
223+
}
224+
],
225+
"source": [
226+
"recommendation_this_user = model.transform(this_user)\n",
227+
"recommendation_this_user.show()"
228+
]
229+
},
230+
{
231+
"cell_type": "code",
232+
"execution_count": 17,
233+
"metadata": {},
234+
"outputs": [
235+
{
236+
"name": "stdout",
237+
"output_type": "stream",
238+
"text": [
239+
"+------+-------+----------+\n",
240+
"|userId|movieId|prediction|\n",
241+
"+------+-------+----------+\n",
242+
"| 12| 63| 3.851338|\n",
243+
"| 12| 38| 2.8361285|\n",
244+
"| 12| 22| 1.6517887|\n",
245+
"| 12| 41| 1.4067035|\n",
246+
"| 12| 79| 1.3491223|\n",
247+
"| 12| 95| 0.9426958|\n",
248+
"| 12| 35| 0.7640405|\n",
249+
"| 12| 45|0.70455414|\n",
250+
"| 12| 83| 0.6145076|\n",
251+
"| 12| 96| 0.1308065|\n",
252+
"| 12| 18| -0.656619|\n",
253+
"| 12| 4|-1.1053085|\n",
254+
"+------+-------+----------+\n",
255+
"\n"
256+
]
257+
}
258+
],
259+
"source": [
260+
"recommendation_this_user.orderBy('prediction', ascending=False).show()"
261+
]
262+
},
263+
{
264+
"cell_type": "code",
265+
"execution_count": null,
266+
"metadata": {},
267+
"outputs": [],
268+
"source": []
269+
}
270+
],
271+
"metadata": {
272+
"kernelspec": {
273+
"display_name": "conda_python3",
274+
"language": "python",
275+
"name": "conda_python3"
276+
},
277+
"language_info": {
278+
"codemirror_mode": {
279+
"name": "ipython",
280+
"version": 3
281+
},
282+
"file_extension": ".py",
283+
"mimetype": "text/x-python",
284+
"name": "python",
285+
"nbconvert_exporter": "python",
286+
"pygments_lexer": "ipython3",
287+
"version": "3.6.4"
288+
}
289+
},
290+
"nbformat": 4,
291+
"nbformat_minor": 2
292+
}

0 commit comments

Comments
 (0)