From 304eac5e653ce3f415328d4c8575d5ae6dc2bd95 Mon Sep 17 00:00:00 2001 From: Nitish Gupta Date: Thu, 25 Dec 2014 23:42:59 +0530 Subject: [PATCH] Logistic CMF for yelp dataset. Python scripts for getting PRF from prediction data. --- Project/.classpath | 19 + Project/.project | 17 + Project/bin/logisticCMF/Cell.class | Bin 0 -> 796 bytes Project/bin/logisticCMF/Eval.class | Bin 0 -> 6855 bytes Project/bin/logisticCMF/Rating.class | Bin 0 -> 7198 bytes Project/bin/logisticCMF/Util.class | Bin 0 -> 5089 bytes Project/bin/logisticCMF/codeTest.class | Bin 0 -> 16238 bytes Project/bin/logisticCMF/data.class | Bin 0 -> 20808 bytes Project/bin/logisticCMF/embedding.class | Bin 0 -> 1125 bytes Project/bin/logisticCMF/embeddings.class | Bin 0 -> 5224 bytes Project/bin/logisticCMF/learner.class | Bin 0 -> 5639 bytes Project/bin/logisticCMF/writeDataToFile.class | Bin 0 -> 3295 bytes .../bin/postProcessing/EntityEmbeddings.class | Bin 0 -> 2872 bytes Project/bin/postProcessing/Similarity.class | Bin 0 -> 3794 bytes Project/bin/postProcessing/Util$1.class | Bin 0 -> 1287 bytes Project/bin/postProcessing/Util.class | Bin 0 -> 2750 bytes .../AttributeCategory.class | Bin 0 -> 11193 bytes .../yelpDataProcessing/ProcessYelpJson.class | Bin 0 -> 7695 bytes .../bin/yelpDataProcessing/reviewData.class | Bin 0 -> 5535 bytes .../bin/yelpDataProcessing/reviewJson.class | Bin 0 -> 4854 bytes Project/src/logisticCMF/Cell.java | 22 + Project/src/logisticCMF/Eval.java | 157 ++++ Project/src/logisticCMF/Rating.java | 177 +++++ Project/src/logisticCMF/Util.java | 156 ++++ Project/src/logisticCMF/codeTest.java | 676 ++++++++++++++++++ Project/src/logisticCMF/data.java | 601 ++++++++++++++++ Project/src/logisticCMF/embedding.java | 29 + Project/src/logisticCMF/embeddings.java | 120 ++++ Project/src/logisticCMF/learner.java | 141 ++++ Project/src/logisticCMF/writeDataToFile.java | 33 + .../src/postProcessing/EntityEmbeddings.java | 47 ++ Project/src/postProcessing/Similarity.java | 57 ++ Project/src/postProcessing/Util.java | 62 ++ .../yelpDataProcessing/AttributeCategory.java | 311 ++++++++ .../yelpDataProcessing/ProcessYelpJson.java | 220 ++++++ .../src/yelpDataProcessing/reviewData.java | 141 ++++ .../src/yelpDataProcessing/reviewJson.java | 131 ++++ PythonScript/clean_text.py | 78 ++ PythonScript/clean_text.py~ | 78 ++ PythonScript/combineAllPredData.py | 29 + PythonScript/combineAllPredData.py~ | 29 + PythonScript/getPRCurveData.py | 28 + PythonScript/getPRCurveData.py~ | 28 + PythonScript/getPRF.py | 22 + PythonScript/getPRF.py~ | 22 + PythonScript/prf.py | 39 + PythonScript/prf.pyc | Bin 0 -> 1302 bytes PythonScript/prf.py~ | 39 + PythonScript/writePRFTable.py | 36 + PythonScript/writePRFTable.py~ | 36 + README.md | 21 + 51 files changed, 3602 insertions(+) create mode 100644 Project/.classpath create mode 100644 Project/.project create mode 100644 Project/bin/logisticCMF/Cell.class create mode 100644 Project/bin/logisticCMF/Eval.class create mode 100644 Project/bin/logisticCMF/Rating.class create mode 100644 Project/bin/logisticCMF/Util.class create mode 100644 Project/bin/logisticCMF/codeTest.class create mode 100644 Project/bin/logisticCMF/data.class create mode 100644 Project/bin/logisticCMF/embedding.class create mode 100644 Project/bin/logisticCMF/embeddings.class create mode 100644 Project/bin/logisticCMF/learner.class create mode 100644 Project/bin/logisticCMF/writeDataToFile.class create mode 100644 Project/bin/postProcessing/EntityEmbeddings.class create mode 100644 Project/bin/postProcessing/Similarity.class create mode 100644 Project/bin/postProcessing/Util$1.class create mode 100644 Project/bin/postProcessing/Util.class create mode 100644 Project/bin/yelpDataProcessing/AttributeCategory.class create mode 100644 Project/bin/yelpDataProcessing/ProcessYelpJson.class create mode 100644 Project/bin/yelpDataProcessing/reviewData.class create mode 100644 Project/bin/yelpDataProcessing/reviewJson.class create mode 100644 Project/src/logisticCMF/Cell.java create mode 100644 Project/src/logisticCMF/Eval.java create mode 100644 Project/src/logisticCMF/Rating.java create mode 100644 Project/src/logisticCMF/Util.java create mode 100644 Project/src/logisticCMF/codeTest.java create mode 100644 Project/src/logisticCMF/data.java create mode 100644 Project/src/logisticCMF/embedding.java create mode 100644 Project/src/logisticCMF/embeddings.java create mode 100644 Project/src/logisticCMF/learner.java create mode 100644 Project/src/logisticCMF/writeDataToFile.java create mode 100644 Project/src/postProcessing/EntityEmbeddings.java create mode 100644 Project/src/postProcessing/Similarity.java create mode 100644 Project/src/postProcessing/Util.java create mode 100644 Project/src/yelpDataProcessing/AttributeCategory.java create mode 100644 Project/src/yelpDataProcessing/ProcessYelpJson.java create mode 100644 Project/src/yelpDataProcessing/reviewData.java create mode 100644 Project/src/yelpDataProcessing/reviewJson.java create mode 100644 PythonScript/clean_text.py create mode 100644 PythonScript/clean_text.py~ create mode 100644 PythonScript/combineAllPredData.py create mode 100644 PythonScript/combineAllPredData.py~ create mode 100644 PythonScript/getPRCurveData.py create mode 100644 PythonScript/getPRCurveData.py~ create mode 100644 PythonScript/getPRF.py create mode 100644 PythonScript/getPRF.py~ create mode 100644 PythonScript/prf.py create mode 100644 PythonScript/prf.pyc create mode 100644 PythonScript/prf.py~ create mode 100644 PythonScript/writePRFTable.py create mode 100644 PythonScript/writePRFTable.py~ create mode 100644 README.md diff --git a/Project/.classpath b/Project/.classpath new file mode 100644 index 0000000..e197418 --- /dev/null +++ b/Project/.classpath @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + diff --git a/Project/.project b/Project/.project new file mode 100644 index 0000000..772bcfe --- /dev/null +++ b/Project/.project @@ -0,0 +1,17 @@ + + + logisitic_cmf_yelp + + + + + + org.eclipse.jdt.core.javabuilder + + + + + + org.eclipse.jdt.core.javanature + + diff --git a/Project/bin/logisticCMF/Cell.class b/Project/bin/logisticCMF/Cell.class new file mode 100644 index 0000000000000000000000000000000000000000..4da479fe8cdab019c8c92e5a444125532c097e34 GIT binary patch literal 796 zcma)3+iuf95IyTS4RKAAI5qbcTA;+CA>#5xt4a{5PX-=>g2+p?Nvw1$V@vKDsb9i( zAQ38o#0T(Eh_grH?_5XyOpI|2#nvT0Jmf>;_nSd2NHs%Q< zUdZXEc{Yyrl^LXJZu!V80UPWp@#N+s87<8C%vR}HaA*lx_`)oZFEvIl*&+C^w@>+f zhxh&zt26kYi^{9CD+Q(loBcH+Tw~6i_ux8iFfw>I7kHmoU1qLx4zK-ihFk3~@VCEW z{he_-h2z^ZY@U&=Q?#M9NmWA~t7u@2i`zmI+l&nPvpK literal 0 HcmV?d00001 diff --git a/Project/bin/logisticCMF/Eval.class b/Project/bin/logisticCMF/Eval.class new file mode 100644 index 0000000000000000000000000000000000000000..f9ea52493148e754522c941b84447510cd9055f2 GIT binary patch literal 6855 zcmb_g33yc175?w+GkMt)GLbD1mdQc_DA5qMKn#o|VF?65t-MSgVaQ}APG*28t=0{z ztyZNhF4b|(#}i`{EWH``ikYptyd{qKFVhVW^>&*5X{F6Z5I&VT-M z&V70IttXxYaITo|ghikr6zL5P#)5%W?XC3}?$$$2*toh&->uh&^l)$ey6#;@ASU39 z8lg?mxFL|=rgr18V5q)bA7~PAcLsaIdMqC0ZbSC&5_77K>WoE$;ohc<6@NHp^cvBo zWjw@11*;5EY+ER>=>IvZB@*ur8S1D!KHnUO#dT?MY^Q+6h4oRRCo#?ER1H@} zqEY3Yhbz*1`XkWx4%I3Inxj#DsErV2dYZCDIkqGa3WkHRWdc@TRhNKmRiwvoqEw(T zjpQ1Aa3{5UP=YcIUK9y9mZ-$_1k@FDk`5G3;P~=379Z^F? zXwb#vAwb4p%!5TZ--(8-gfSIqxB!g|p>YLSNF+;r8MHTZs;O$58%Bk(&wVqB17ABMeaf^nFC0tJCutZBL-lpLa8N1?sz0cTa zg!EW25>64(`slk# zG$(d27@6sn&J-f4_w@L~G+I?eRh7t~Olfs!RhJuW*rlNlBnU}rI(d1pCkOqAI1#4W zbUb;Eh5^aToIoTT(}Ur`wZ@PKQN)~JM9gxe!kVbe8sgY(!WuW$NsE2{1b;HcNS}V} z)i8uB3C}>B@LVtf;WH3Rrofp?@~C|p-i!C~D3%(5ITIX}X++{I>Blu1-j8b~;lr^m zJtW%|DdHyy6rcmq9Z4c6Ogl19&Z-bu-enk>>{t^l$oQwG?N zh-@I)f=ml(UUMK2kLrOTWub3^s84X7)Sa8;Oxr^B=%fiAZK>+=;5OXu#D}wrmf5gm zwC|9$swETNL^BhxgcdS4T4aC@YB+?$bh6JF>Xb1mOcn)ITDe3@+$9Us-B}FehGPY< zAMztBSD8%I`!#$FAD3;P?Vp&~eJ%c~7PJ*;!~SR|%16$OKDWmFiOw3wUV2ZDpF2Xui$T_&w%#{xGV$8je{ z8MMq?3mJMeY(zDT;RO4i5ti+8Wl(3z2$mlMCpCN?$2eOnPk2(pQ&P}j=%Im~x?K9A zhA+t_cO)DdYL=@WKK2z2&)}=HuO~tX%O{A}HvhJ55{k$1bq(LZv*gWSu(zK-j8ThY z`SS`%-U!<8Z4KX%D(o?N=aIO6Ps4S%UXFgC;bz<-M?ccg3$^>4h7aOHw3(_!fX{^% zcE)+KQz}pf5hbRPte9^63eP+7>nzT64h_bPeht6DZ|OorCViqs1tazA<)h3fL+@vA z_?6Iip3RD z^luto!rx^=4h$HQUu-<%jcLjxe3pG|u&3}34gbU|q);qkZjSGQDa&5PYfk)|{81XR zfza?DX|i3x>cZ=c;(BFcGv1U1?=9(0R(VZTwo|;r;w2%*89zZ4%{Ja8tW;RBk!oDR zP7Eqq7pSq52Ua92jWXg@;nsvl8t7C8hL{^RgTZIf&%P%P4VZUDfdp#r;5y|L`2sW3 zu=_(HqgM|#M|=AjgDBLEBG*K){Xnb(6L+>yNzs+&=YbN2H!pjh- z2iyyV!BgIqZ?`Yjk414Qrga&xF#yZ40^q$ zxvll`&Ff|BpIER-o?WSlSzi zRSTUa?6P+`210tkV41OWled#Hw4@#8eZ|umiAMuQYfxI_mI+%YJ-`emyp@1C;gYWp zoH;qOaORRP5m|G-N~GpF+~*5LYA)sZT9Gy9%SG0luNbL$0iSlE5Jg-mrpBwOQ+`&T zM9J3UnDi*hQ=`eL(bUvvdTKNyHL6UFWZC`aj&nQry2nso zTeNT#=N&<9^N^~Q@Q`=Ib9!j>sahi(kPOyISZFLd-Uk&7+rfzF`5vOU@1VZjCA!5ka zDC(7LxQ<%o=Ur#ROp*?m%^=Pp8|D%YAOBSmplY(A2CWQak`Zp%1``I|1e=0n%?Ktl zG{F-HS$KIVOH6HJxZ(u%Sg~aoa}#SR$*U}kadZ?{+qS2~B&p1h74c-Co~)ctpcfLr zbCm@q!PY3?0SUJamdZ-P#D;BJC1!C=K7Diq)2kCE&6i_qO3?^rWX&owX2Xcp(5$=Q zspZcR?2wO|N2P3*0%^15Y4ZOGqy*%nnF5s*sLT|YmnD#PyqZA(8pxIN$RxvK;$IFg3$I@=Q(Vj^Wm%iN3_znI`U^8%W8GH?W-sc93Z6 zX~>2Q1N|uj{kRhcXik!HvuGe+l1&4xTWDY@YwL!*)p`EBMq6H^Bd^hAZSascC-4!0 z6F4HUSQ{4ClxvClwrFtlDx zQq&wa+Psd`m?~3aDxY2Sz<8jF9!LRY%WzG}rgj`>-Ep05$Ct5@CTcf(3ed$Ky#;mr zn@uOSVlTGgUefRwo7B^6P(NdfdIj5g6V!zd-C{Wc;$rlOD`1G5`Dc^+uthwGo#HtJ z#S7SFv7*m1l{Nc(^jp>=qKH4hwq&ua!>37iR#!_q9^{Bk+;S0%&H+{mk7YGW$^lY7 z*V2N|a^ymRr5T^29UjzC+e0iyIp`E^c$mAn*emAZ5svb4udrZ*qe9BQj7K>tRw>1L za2jkwR{4>)Eq2V;#Xcq({{&_xnK(#|1+?j~b*XikwZUd>urtqUSd<>Cc??gC;0q&o zdK6zKDO_XtTBFD7DZw`n-3=%26_nOkI=*!zIm<20-S74En%{kmd0vM%k3=igN^|dr z%eHhFcI(no`;_esc3E;tbJ;vfHGWOu|1o^G+Wh@~^%#Cw?adp*kJGVQsqBo>^FdxG zV#GDhQofsgbPqN&YM0_FUL5wZn7TR60+MFvtu$oxD`L;$WHyYkP|XeGpP9)j$Ax<06*u*LyNYv zMI57!R&3-e@G;77W?PYkE>A^emb}U=g4>BUUS;?aB^VuGg?Iy+SRvj-rF=_r;}^+7 z$V-h)RqETA1?j@qHjF)NBI;VVXf3yrov^um1iw6qQ(KAR?~mh8MSmH^i=%jX6#q(W z6#bXWwYE{bF^bd0!ovMpdveC*7i%5K83!-cTFsd}L%66@T^(&$~)#w84*)P#mtU*qhiiO zJlDJgnYP|Ry>~KA4)6whkWKUuFJ_08trZIDI#JC%`Kc2%45EcHd7@U#<8Go`Y5W5Q KZ{BQBqW*sj1aSiZ literal 0 HcmV?d00001 diff --git a/Project/bin/logisticCMF/Rating.class b/Project/bin/logisticCMF/Rating.class new file mode 100644 index 0000000000000000000000000000000000000000..2688baa7eb24b5c5507f36c38f20a311b1575e72 GIT binary patch literal 7198 zcmcIo33yc175?wcn|YJT3y?rUh8M3bL9YG*PsZuRaQR`AE3}C9Ywnb~Tw6<2Ot*w^UrM0%TwVRm!=e;+{OcFo+Dj0op-+kwv zd(Qd)bI!d!^Wme91DLKR>F_8NMdNLeq!nqNyJUVYo*fMXZw*hL`nUK5t(6 zN`>HQ8=K7zD-w?-bre#O?PeriwQyO_7X@u@yu}Q{i(v)|P)wuB%2(4cmfnat%Wl#0U%%sWAq8@XKhNfjk6*D8aBGUym|yG>%ailrY0B4nXth zkV5eQZj`T-C6f%4ppt{zm6m$IwKQN zn>e!*iSVYyqmsKttpuh7rx-X@0Lv3f)LUUI8N?jS(=nIcWntiP-E| zi_9q;SC$hAn_mkX`B$DZZ!)eeE#!NWQOavK6K$r0Tv3jgy`1#5;E2sC=MBv-)o&NmM}Bo7a(jHom%$^9+Tty&3ONVtzc)9=3XjucMy1(<|iU z@|;ve&=25Dg+MB4CZ@JT5<#p&gO1PUl#%*PNy}_E(1^20j5gC+o``pt32T$W#DS>X zKS_gFjdOLJldIM)DF!|V=7dfPrp#rneN<-HGmy$UG)o#E69DO7ST%KOm4K8qtttVg zDu_C?8CZh|nI7)wFk?ia@)OzXhNd=vb)-;L!dxFQHzcQ88_D=OFcLAu88j>IM3NLN zb5YE$l{(HN?3qF|C)L_&Cd`&P76UV(gK1fm`8GciHJ$k&5@-&h1M9`L4NN=NV3smR zf(a1-Bd=;Cs23Qx5T9q-CY>ajG?0I14HUiAxY)oY;)g~Y+5@ zjP{%7rNYr{#K1KomERnXS>Z@5xyalkqSqPt0=BRobflPdCuAGt zBAR8=02sSd0bEaJ2sdd}0AHf5Nh?f}ei=9F_(~3&=EYO2dj@X8&9tW@oJg9^RG)g} zHnQ@0V&^skx8gS1Nm742^IWTfxE*)uxFgdQvYVZ-c{@=rV@2srxJ)M$VDC2YHJd%s z6{L#GgXZ|a#_TRphc;IFg3?hF)K=1EM zUz*u5sF9FHNPNFUQ3gIc(J0u+aabz3-FAD8bpu2ROUJGN_LAvTl3;_b4B%1DS4!x< zZQwCHPQ~lYP4yDJ!b}d>+~8DgKAymnI=-7j8h$YFJ$#>9BP{-5D^3SXa@H<%=c!;n zp4RbHCRJ?BnXd+Zh-Vmpn7NU==Vo9QQqLNA4nLONV%AEjF?QEqsN*Mny1sPSAP!(y z06(LX)ABfe!N4yNB%cyylt-v|j69V2mkj(8r}Nlh#;l07iL*(We#OAAu#<7M5-Dp< z9{bj-27V)gnv^DS=QRVb<99@OO*pyKwwz}WyEp#cz#pXdm)fbkBEfW$rbU}0tY_R? z4hU}=cuR;fTn-3zJuO?jk;6HK%(mQ1D zXpAG$Z@HP^Q7|AU40o6bZ-&Uz7(<)H^n%=@O+sJ(2n+e+a67jfi~qHZx$+xImzd{S z5<>}%c=se9D#(0S`BD*b^0S97N);GtkQz*9n1OagBQXiH4Tr+4>`LY)N>-kK%}W13RK3{@<97WLVa`^uW(hANdc61vQi5r!HmOXMJxWhTi&>M~HO ziT|ZYXT?Klff}z+^Z%MI$7_!@T^=*!aXg7rA5SHkO=(-q4TnWjrGj8QIqKyl#mirw ziuu&}=0SiXo{c$%oT>SgnF0*rx50eV_%;MZ96e{R16Eh;!O#l*F_i2^X+;-CH0;5s z-56alc{j#Z>_Vxx3niQzUoS6O_KPq3#ozNXIgJSwWu;mg6CdG59#rr-mF9qkXtr5{ zFr4O$qABGlL^TbXi=kMKVK@`TI2$2MrgR0RX&|PclD{&VO4pP?t!f#7C6N0Ckigg{#wGy5Ar8rmy0;=TJ zLotEX&)?+^u$~D7wt#bQb)l}IXvJQv+>5jKpsA=OjaItVTC|prG@?to(9zI^M8lLl zu=XIe)GKlEs~TO^1iaOGJFu7AH?hjBc*h~SI7Ilas^2Z`jHy!!|dJDq^`5b-USh~x1z?jx#S%Uy|g3XsbWD27{R(P_{;Yp3K!dqYE z;*_BwNB<=;h&e|eaOA}ZHHNE$M0B(oi4Sd}jbm5q=18YKJV)9@TZ3yA+YA3TBc?q3 zYS6kSV2J?u&8^O5G-p0&~&kjx$>n|VCfEASLssjz>+DXi#M(Egr}Ok zXzq*8=AxD%ZOE6+MSrp1cQ*!COb%%wANxRB8O478otYWUozZ(|G)E?oHN%|@8e&uH zyo`lIj+li*`&r1DJ{EGOkA*|~Sy*z!EDZItkTZQOJurrS1FdZ39tvYAY6 zVRD&Ri&kFq+IU%y;4;2#L6n!+c2@W}?%|i6l1 z^Hnu2P;+phTEGkc3jY6t)wo1$z@=&{ukH`waIrOC`*DSO9#^WDcwIk;tJT}M zM!iqjZr;hIYiyCS>VccCA@znD#e`F|^Hn$qe&^3z97jz+35#2GFVZ==5djq+~yx155e`(8)F>oq~4yvvg9jpU(c0LU!l;JVuHiuIIsI zD^}nJTtr&miyQGgj~gH0W}9#eY?(uu{(wU+;lPp5j9lbMXeL*OLuvxkK~C6*)KMJy z?9tI2@j4+FrB4sUkRt#@U!8{oh`u^M9*A2B#BBuPb^>t+f!IzU?jjI(A9f&k{rvPm ulpFyd`szF!K=jr5@j%=|Aa)Rl`v}DS1mXb#@gRYC=&%EE4D-XICjJjKht5C% literal 0 HcmV?d00001 diff --git a/Project/bin/logisticCMF/Util.class b/Project/bin/logisticCMF/Util.class new file mode 100644 index 0000000000000000000000000000000000000000..3815de805d5d772a4eedc7a5dc59dcd8030c9915 GIT binary patch literal 5089 zcmcgvTXhFcAUQL5pV4m)FYjSTi$xo)#JZAk{%q?z7s=4^Ra zljR6uj<||4v@>82i&f#lnCH#?meZO|r`BW~+p%cXP!l-Ls$9+->mV2{?QOMz`Ss@T zK*Dmkzbs+*XG~`#$27W&Ze1}YY^#+{x2!DgZiO2`VVrPscBY?}5!|L>#w5aC=yFtH zPC2TvFoN5O&7?s-X9IWOPF8~F%9&0!r(t$Y@DT0Ze9JV{pe}-1LNw{dUo}vV24a}8 z_Oj$NeGdU5FvZ0MtdUcep%Hf(Sc0V*$}NF+>_Iz4gl7eS%qQC-XSv(J=dqlOVNqyT z)9$4^HP}5qg#thc9L)w=u!6hI6j?dTD@Jf{?M^xBN(1-dehrZUbGS>LiVWQ;@mC9B zS~bLig(N%Dq`RHXwA5}d#~Q4QU@iSmLN#eIY+yaw>3Vozq_0n?c}LKdVp8DrUbl$b zDr_{+fllcvdmlG4r}onOd)&^DgUOLEvoPN;g=!uMMP`ApJUPqKoAEu*+gKitfXM$&H^Z3+#Gu1}_DQ{2$2DoTJCY#59h+I?B^0q3(V>Zr~d@OwBfj zhO7*muX1YZy2hS=cKUuiVc?rMN+LR0cd<}fJpbi*3dbUN+E=)#2Ql!BBo-Nxv!ydp zJjWN(QZkR?dFd5amoN059I%qR*{La#A?GAa-c4gSK$o}80KRSDWmQWjXWAJlv&^0_ z@Ew_rjGAeG?iB;ylesAG569(06<#&)0}-QWGMix~Gs98*P(ysj)=gdO;;oxH+Tw}s zRa?8`iOn7D-3{?}X;YQ;k2RDuDBMp9sz1@t7*sMJcekzqrca{#6n<*pX97%E0Tac~ zNjEn_9sm{Vl!p0Tn?55Q61*>M_LIWN>`2C0BPy=CeQr?J@?A}VywL?(wBc*FM{%BM zT1!+q6T~$hx0b4cl~_xNghgCu*z8j~X{&o$Rc{g#rF)a{1M)FBDM@WBns3phiuovN zDA~Wul-W~m{#V%f@i<@Q1W~*%YnHe6fxBj@HPPr_8~6=A;%v?u%#K>CL~d_*@9d1? z6S+l~#6@(K_uo>16ih?SMI$rDntr zKfS!YO?=8~xNZWo3%ATtTMX9#a}lFq89s-3bX=*N&6fnUW|a<*zJ#it3B;TAx^XOM zEdSwLp3pd92<`8o{6sPv2kpkKzDO#tn@9$*VXZ=86C{3jCoZtue} z37{N(Jt1R`*oLKi%4j=wsI#Tani|_z=xP=o;@{M>=yXQ7W-NFmp9zZ9dg z?i~8Zkv_ubU`W9=bOAXHN3zG>t%2-;`yBLd>%Z5(M#ChQ}M_I z9xe1cjzi-(GL9#Gqh~!MLeE;c!LKv=mTM%L=GShV`Kf&36$-jKegfBH0~&bz#=s#C z<`3}#UUUx;s^<>!Uc2lOtRr-9XeC3vBs2@3L_8MG(@d|A>10`DfMr}%Ec+X`9RJ_9 zeAnA@U-2m8`2KO+?j!daL1FAe6+ars>k8C2^pz)Zlk8PVT%LadRlfC`MXY%=|Cs;x zBQ}4=5%Wp^!b{qv%+%Ap&(p;UPv;tny0{&Cm>mgpv2QoQz-BDO7XG-{%uoLWJF=U1 zc>+0h=TW|$U~ishcTTV~FLLe!cI8Lx$iK7i{()Zn6G<(Cls1>oYV=Wy`n8RGZsVun zAa-dFW4HD^(%Op{)L!P?YZ%gA=a=6*>=aRjKK`yHu^2%*C#s!S3P3T|&NA8#_L`xc z##5jl+=yT%aN$UR!qM}VWlbzP!lxX zN}>vtvnpq`Tw@#S`FT`bqkO(Tv&HksYhA&J-mX( zi03}sjfc?2aTgw81s+v3akbU)lsZbKew(^0*H#J_I4a@JxWYB0aE+_$QS3#FU3)>; zweO|9U_D0^IEFA4m{{i}xQ+l_#HBo7?+9Qe4-hb3D_V^Yl=tjXfKhUm1UsccNlpN= zMl}({%y0jzK4zDRnTJ!wFNs-W5Hl}Yv+fVVgm*!6-W8q-zn9j65+z|^iqYroZn?4S1x_aHF$>!d`TMjrx^d4sc z4pZfiU^$+^I*zyCNsQuY9KbQoJcC#8EKcD$yvOlXynugE@&ApNcn2QW7V&Fd8g#2$ z2mH>xwzV4hqF0R+$ES)6)7kvL=P1Nj)n2+H* zwJzYki~B~1;!51L)!J5DYh7BkrCQs5YU@_}{qB2jwqz2epSJ&eEaAR;?>qOL-#PcZ z-#zc~{MQdX3Sg8x?}aAl*U(%aZjFX(C(fKwQrlb?nipz~dSM9qEDx>>mNW#L>Pu$V zEDzO21x6&;R42%tVGp%M!wn^K+0)!uCh*OiJhgnn!g-SgQYpxq*xb|_4K_uqgAHvV zlPdy!0L~j*+YoLFN5={DVFlF`I+3e-(I>WIl2|`-kZYkYvIL&7_FWkik!Qh#N02ik z+!UJC)>spY%nQ~ugp_8@wZVq!U?i-r?M^>*VtNZw+yQ+pe3ez*{41fof&Fuj;xs!iUz|Ct$yU=ATI_B_DMiBx2-7}ZVXu%f`bKq z-sARA?O_EDa5koHcUcAw!7wiljhSx`R;_7`h8istppY;%w-Nh1=iYF0$(%^IDOwec zgo2G_UJMuPmspXp;U#AQDVjmiqk37qd2AL1Pc?DqW+~6>geeMV{kzJ{MJxiY?W{;Tdrdr2&P(? zh6;vbwAnG7TOq|iE*ySL$4oC~2>RM6r{Zs6mcr57qFmC@#8$-dKc^?D9rLxoY6t}* zP328>YMTiTAC?l2b-`$`tYQ&=7o_xu7~*wxbZjdfK95nH9K?x1KdMlzT)Ti%(n&6; z_F^GbOTOJ!C=~JIIOM7y$6Gi7CvpSFG_8s@x6sDvX$1D9p6L4u)#t-mN~mhUCs=)2xjhuUP&&AMZ42w=NyVFQ;RRR zutH^=4BM+}A!;^>u2K0VStZ+DOn1}ym!agXX=`239N{LAMa&8A)vQo`Rj{!typ{&} z+FC;qXG(6YnLI9>(?4a2gmbycgo@;)47<+bm>HU2M^rY`LS1>3X`rT!XbK9`Y^h0; zx<;65ihuIz+E7b0%-rq8X-P3(F*`ou#fC1fQ8)Xs9=Sf8;Tk!jF3gZR1e+|Jh0V+( zOoVeH%`KrwbdBJUG&*(98g#}vIM0i7Q&Xdz;Vk?R=M!=|r!6#lX%~^s^QtV3NFOes zYf46pC{Ye=4Mj_o@+Cf8NF7V)GD0^Zx|%__9v55qF)m@aDM%;SLxMxosGcUQm_*m( zG7Fbut023zr6C-h*xXQ86%9tX?LEULO{!3dc0I1Ja3!vy+wD2?BEfKzN)6S)hA=gw zBj{SrRu4+gQc4I#;?G~_Nnyjm08miO2}NodCX-ZJqY&pZqc*Xy8E&czt!5$*MyW;? z)u~;!HcI-?HMnm<@k+6!LbjEf`O$t8d_1j$dcy|oGJ1pFZyO@dXh-|8x*xcCC z5Q>J}1=c7B2!^G>vmza!T%d)?H8gKoB-F|zpw|XzLZ%Wvmq0N$`fxuJY)K@vG8|gf z8dGD3g?8md+c;;Yvogs{l>5!;x9@TKi_&SdXc2vovh`fUcX3O`mRu4ItyWQ98(aGk z3mrIIx%n{*kK+kGV{99$$x>>TOeRx^uCbGtF^u8hLnobFQ@n~fMBVe0g{RfiC8sgC zA~d;4<&BAUTP? z`S4Rdi{kkxUu}YXdlw8R6Pb>eE&NQ`Xxh2fkDue0Ui_jHvcWWXe1>+xF)Z>rh$ z_ED3SMsHhq2ft(52s41gwJAF}Mp(MYN_wg!7Em63mtq)%p%fszct4hEQ|0*ao_Z(? z_E;E%EFV4)964m~f92=<@MpoO{QTm4`l48cQvSCSqg42dg}&4h3DTbwBR3#sV7bh>M^UvKDY3N!rD~v-l}@OvhYteJ45wMY$lUZ zz5lY%ihpoJh*+YDP6WBCa!&cuXed&pax3$v8{nw`+H7{cn3j0#C%W3SjJ_zjyNJ)i zv6$-wpVE9faF+O6$wW}YPEUIQ^E})OLBxql^nZ!P9#(pu645XvFUbL zS7Kh)Jxo)2k{_WjeVOj6P|qZxyAHD z0WD<1UcSg}tHq0Ht=+FKN(AFl)}qOKyQ0cBN+69MMKkTDy(-TU?v+~VC>rgKsmr4U zr7q6O($LS0_8<)9^>Ndm|bowzyVvGiSaSD{^apQfKj3@jwK^7`o13npw_QgHl|_E! zGHB}HQ@lKje8VM1c2_;MCSPO)qP5lT2*JAChjRON==t6Png^o=?u(wxI5!zfDh1WJW!M&Q<5@*97}3P zoGmyY=~fn4A_{@zdv9ipt9xT(OxOM0Tc(8?>Snh^lV@*dz`5P_Dz{sB0WUI28_CsF z4P$XSPU)4qV|2+r0>w|JKDn16xy)I|C-?KgGI`Q8o+4$3CGCo@ctR()1y$uvq{EV1 zaJWw%VtNY3y`e@Pw&aIus+s^+#xZAcLcl@#2v>j*^`FqnK#XGCc5sWK;9`@_pN_n6)5$3DEWkj0*C z_LOj@`jxp0ISY4UpZn48US4Y0mp}dXU?%^~!hUuycpD3<@tc_**l!l@ME}HX9=kKc zzQuATAV57p9a#BuZ$1o$E#5PzAN-AX+Tzus&fPglcYAOEbyjzi#=3W(=FXecfdi-C zhoQSrv@o}LCrS$2F|q@r+c740Y&*vBvfRC#n9FWK5 zZefG)66}le%&o`?cn>Ye^R(mATi{pic5DlHmCd>xYWq_*FpMh~(AGlMkBf+PF{biL zp+C+Zuf)~3hF8=9*Wxao=I2L%2Y0 zvz#b9#9n0*e@-UkQ)ui|?y6}tw}Sh;lE0_33O>U&e44!rtK-h9#&fo_sAYVIRonYY zg-o>_k7|ci+wrJEY$l#31inP<9mCjDse+HGt`KA`X1MOd7~P_>!%y1rQc(w9DS80E z;@>wq@RsWSJ-dt9UCeI&Ro%Pr9=GBjO0#3g<#gbWv3&R^Cm)W>{V)buWq_gjQ6Rgh z9e-O8$PO6l-}tS8oPhBFKHY&&oz`bN@R`&4atFS2T3_$L*KB3C3j#UqVgz#f49YX} zJbB(cUx#>0`{bG1aB!Y?8xB++3}iWpp|51Paak1T(;=BEG}F!6Cv}!r&7#0Avihc* zwYBT4yp&mKaOo@iIa8R7<3Oe&H2gmgR3Js|o`7QH+xgXnvj?CKNS)+hSIRqSv?@c1a9M)BC8uh41=UuX%QOXOz9b+|3WAo zmZEk~p)@jeRu4cOkvgk~P$KdACZNVRQ_=wCW~J{rqQUtEARNhY7t%P~0_12S>uzt`es7B4JvJgtQS{=}h@X$9&I$ zoDSU?y;wFY#WEG-=_JhQMZ(-v2|f5{UM~`=QzbY$bo0+~y+~M;B7qMp6(PPn?^fBM zOj@Ucc6DKDWFGI_6EF;w2v6)XTH}lsr5&tu28X8|T$X_xz~!k;*X5V>y8QCq zmT%~7`KENsZ|$~xORvjEdRso)+wv<@mRE;ELmg4H+i)A(b4lo`co>{Vk~p6Sv}zv8 z7LZIX#8Fs8&U`$HKl|7@Vyw#yMID=V}{qo^~eA z*RI8nwC%V+y9-;i9k@_?1Q%&f<6`Y){8)P(muTzZ^m`{4Y*#v88_;?ag+W6Zq{GME&6M?ReuxP_4jd`{ukV? ze~vp09d{a8xXaiNcN>Fnk1+)I8b{zhV;t@`7U2P-4m*quXgAKqPUB{D828{o<3&7V zyo8615AcZb2_7{*$74M6J#KpOgqefgW`A^=2jNL`IG!>`<7sm|o-wE5S#t)SGf%+t zW<6do&%lf3HTbEy9WR+r;brq3ykdTUpL_WF&@%+DdW!Lyrvkt7gz&m&8Gh?I6K{IX z$6KC@@U~|w-tk%?pT~7zz^X$g^o)_^4&#U;O=WXoq`~iRRe25P`|HOx0 zFaGQuf{(n%;IG~a{LLG}|9Cgz6YqKW)O!p5?(M{9-k;!e?;d>NGx3$rhp&CP_?J(; zQYqz&rUjz;qN4jQ6w`Nwczh3v*Y~RUd>>0jMjy${*k7!SF_M)rU$QeIl9O?P^tBH# zcR1&X*a0SEi>&5xge3yD*4U>_)mqCI4=`+{)UaXQ?CmWDNWo?|bncPflJ`4IMi4OSIcR?@nsJ@+Hb+e|{Znlv`io zH0KX8ROPelk;^xjN3JYZuiAS!a>b4W{@4L2GtC(y`E2D^XDh$MV*H7-RlwLO=agm! zG6Q~fYRXinrp(k+lcn-&(s?MO`>Dz5erk%(N}QVPSxneTr>6L<#HlH1R*dS#XC+Qe zYE~MH)18|9G%d+?har4N5G2k+y+M#T4<#8FJ2=LTOPq(2WOaieaUQZENOvCUPGY-2 z;E>qw2!aGXO;Wp?aS00A8w3fOnKY{#1PLnHBLv;)VtNR^(_lyt!QPNaki8@uyWy1} zbV;(hA(0?$HWIxNvv`;#5i{Q860`Vdf|#WlOc1j)g9&1$2D=io_yS49ERB!^F-tR; zAZBR>6T~cO`8Y9)FP}ur(g;ZqvowPVVwPqwLCn-(oS4bEBv>G2^2vU%q(AnP{gE#N zP#_2J1Lp%VRr2`>?qF2OL0Bw9uv88v%^FHtbqMLvq5KqZ7`96RKL9JlOHzbi%W!-u z#rT(ukOBN=V2~Uxr81H)L`TUqIf5^=kCbW|Ehq9Ne^|yy3*YXyamHCvCR=2z+`(7$ z?J`bY;rsbFWW0PP<(eiFv_Uda8!VHxAIKDKicHm}%QS7SRA>vOQVYs-twCmJtuj+v zEwi+9WVUv(%+co&B&4sDrTRLl*H4#a`o$8~uaf2ZgVLaPN~8XkH0yh$#V{pe{%ofew$DfquyNK~A9gy6qIArkeX-jGPtgnC0FQTIub)eVV6ZO2C9|0%>2 zmX-)bjKTc3CeMUeKxLwhb{KYs@{zew@Fyh%by6EiSt=NQeR0V zvkA^hWfS-B1)JcfL0#E|97$@TKC3vVube-lu%BGOKZf2R7tSo?8`XJ?Qtg=dHnn&F-td6W_jbH zj6AP9OYI5gOOy%fe_d*sFfq7&ZY|QypAWNC5D5cTU5!;(Ozjaw=d(+@oAWLj- zn*8Rb)b(K-`q`fIvZlk2sd@wR+8GMDWFQmR_6+9r-wI53);EBuW_1PgP=YxOSy}=5 zYJ~)|h+q~I%n=0haDq9KV2&b~M-a@T2<8}qc{IT+C78|^89jlS#WduA*_V}KMYQ(@ zOn!aV8<;os1ZJ<1`F(>qkzh_Dn3D}i{=Ml{L z1hbl89&cxt?*~kN8`urZ$t-EAU9h#U+#1{FX`-{2t?q93+rBwMQ)l)TrAafpM(PrR zdlI*OklVfnL$z9r(3YZHtH&g5Ip%09ut00T3aydAwqUJxGB#=vzAI?uJA$=I;BJj? z`%B{6{_Oa+Ki!Sgp?t%kj)Vr^rL1tb{Q^vIxBW)+k8k@sNA{* zLVe_nLSy92zH$%$-Zx`78}0JIR;c8*lUEPMuXge3k@(ePym}&j)v3U6z8M;=PP5u+ z1Y&*3bI(2XEB~*Y zGd%hE0}m0=IHO;H45pISRC7yvrloQE{8^)$;+c4Wyr?}hz9BxkHJ)r9ebSmU6O9?B zP$nI3NzOnErpP??z9ZApI(lk49p5|;L)4?qqL$`lJkya*FjeMjGjX1Ky6K75*80il z7tAEuGkWO2hIp$kdG(i>PN<6Msg5L;QS2Iae!LBx>vC%*>PNNqMVWL(~!(0 zniFX?YbcS-v}86nG_^CuT!T0}vW8OIT$IS<8F{iEXk#kfWUVi-rlWls9+_sjhQvzq z^maA)qTz!b?TNG=(pmK(#PcQKbxf zv`M8@D5YaeD$}L0CiS7h03FLzp6%(dFHGZf-{Vb+kYD;vFv+J&^(KXNX_84HT{^*} z0$rMFlBwsIZc;J%12prH)q!PUnn9H^%^Z^&XfB8tZvv?!qcp8u`cq%I62#4{YXS4h za+IJ68U;?Ek6SfVtuTaVfRq?@acZWLR?LbadbL)HRJl}Eve2YOv=}RCPC#2CqZ%wx zajzvn7`QM?F^n0a<+L(DD|!Kf3~bV=;&Jq3gr6j?NP4!+LXg=FAlE7sKoB z^_5ElbT-owYGfBBrggNmV%i{qQ~ouz@l2*ROl@?Qd~7gjBW*%`ysa&fgs{}+A%mM| zdM(zF^)kdcCY?)L1T|CoB~zbKhjytC()mmjqzfTMu`sQoi%hzhI+=X!ZLKYb;Xvi7 zF4?u{Abr1K(l(jY3rJlbq)V9!fOJst_I8MbwD^`u-xeM4C(iDOx3-7ra=Icw-^r1b z8L1AazDZZoRT!Wxo^DU*s>3i~Mi_Dl{a;SsHR&3<7M)-Y;vx!|Rivyp?dzrSPLp=g z_1I`8-H};`?XL_&`ENAoBsy7G`aP3wRumL@6r7UETTHr@`lC~0YpOjFqT8q|K;O?< z{-Vw8nZ$aNZl@nW!%`ihVR{7+ij!e$GN4!@z8-8^MR%I?L;4YuShSABNo$!#xLK|3 zYJFb5{TQQH#43c@_lUgQ%QP@olY2|W{nQ{tasi2kChbpa%55pI$WScS0?nT9Qog?F=U?1&yg>23}hGiHXgkfBz5> z{7=!X8E!e#DhDC0qW4UCpZ*2*w23f}&m*8N4a+04R+TLB1Cu^fl-nqKlbTyZtTyHo zD{J*ioo!k{6wCEwij9^70MYB4>o;s})4l_&`Jv4Z>t!i;wl*$aQAs+n7V^Jtv3RwM z!7!#GL4$<=mt?buVvs$M%r&(gwGhHYZIFF}<{bie1stGv0vyb-qL65*e=o=d5Ek*- z&@#w{kPmDzjdxrO{JwlZqQ z4Co#o$Hxb_4jQKHkP_VKsn%9ROyaGWJf0_jUhV7PkBAFA+)^;rPz6n{ZChTs^5eMP z{SbNJ1xf1)qsZwQww?X%Qq3toLKN*ix8h z^6UW5g7~|g%_WHAOr9eUUIe#M`YC5K0WQ_JSM0df^$j+lIJzqG1b>)MGaeRW{D}XmY>eKb3*vy5NK&0l(GoWVPiozgRPE260z2c(;bOfaM8ery8J705em9y z1<;Ds@+b?qE^o)&^(wD_aVGOX!an|#g!(6v4DDIB0Vl4&{ zK;B^TM$5rW#M40r5|$`%sc=twF2E!!v^lTN0~m|wyDCwWPHbpNY?L9?!bRB|8sLz4 z>UTYfcy}lOs$f7jdC)f*3Hxhtc zcV0)Ux{(-*+1eACCAyghGr@LX3dPY4)3xJSkW6e`n6RHLpHTqB(~<$lj&jM-;q`rh z*tV%jByrZawI(u&=^#vVD!o~#3N)}}T}h}#iw2j_T2%-K87|L|Y)GQhAG1>F_3?}o zsRnrusIsQEF`j`_fdE63{&tgpAb{_0O(dH$>);3ryM72DtUyay4TzSt!9db>tWx+9 z2V$$-SUB4dvWo|y0={%3wIM;!c;*ZR1E754bW9HnMht+qeX{+h!+NzKhLz0GJQoSS z$nq+Da8NCe#%gN}IcVx=OelecZNp08or>DFDRU6kaw+B$#Awf?+NNRy+Bb6~J;;xU zcETGEGU5k@>(2Z$lYcG_s0l=tbXh|Zs}1twh|smfFGw9-ky={;X@SDDD?wF0iV{;r zl7%9#sk0I?w^zIZJa4p!RRGh70WBpfbK{>4iVeLg^U%OW%fpGIAiT%-uU9F^zeRXp zty*c6h=3^Db4-J+`ixR$l^3jl_ANz@ZH;R40@|FM%1A$GyHdN&wL+H)!DZQgkbj5q zrK-IG#IKO1n^BLrlM(f7PX-jt`6KeHm=*}RjaP@VQ-w>ayOd_k3rIBK;*gkpvXOrLLzu>j% zuY1aL=$(z19LuHLx=^aIaj^wTbN`n(rUrJ8mGv63aA4ttI2ff6CRwJ^TuY>U!0pPV zL4FsWrzYgamdv_}W?&Pbx+1eKo&n1c&V?$JNGj_3+O>!0K{6@Gh~$Rn@0zT@u928e zHYP*|0JuWWnTj=v4?+GA!oJ4xg_%D#`4j#WMp3iR;?*$s9!2c)!C;U-$50(=D5g_; zb@`tvMZTH9cMx(xq8V=ahQy+{pf^;v%9_$8vh}t>%S9=$V)9pRoY%X75E@?7@ELv} zPje!P#prb~&F;1HJjBYgshm8pkR59*Puiol!)ex1j<_aU3xv!*%t%Ro=KNl>|L@WL zD=k{+Vwy&wQ6#Py4jd4hO56k)N1WkX3zG#*@U*4ig$*>yOrzZBgO-x5Q7t6~L&K1C zO|6F|#eNs1IuQ9tp2YIvjuz#jBMAc`Kq|mD();)f;K@Mh6U}-(Bk7;>9tr=PXQcg| z=OT=PVALU4Ta^0a%>XTiaA2bQay51?Z+}v?k49A;q^cGBsAezK zW{ab<#W5(3Lxbb?QeCzIA8MO#HN_~Vw*?MaFFa68btLpm}Y@=r%(mbt4C5Z$dIO^=sX%m-$bjcL9ClV zq#w~JdXOp=xlBzQI+5mKTp`?ii*S`7oPoM~zBb{i@2AmwX>=97$Lyss@?9qc#docy zyhY=$OIWMh*CbYT9%9wdc&eo-kgPd0nob7!PNgx(agU|5kzn0I$0C`399>0qw408n zyU|)?tFI=AMJ$kp7JzJUuOx4-$wnO(*(L}JJ>7K5BeVn?U#5$z4$$caDfW7^?{v|6 zm*sVlQrdcOdm+syhbe3i{~a$9Ah)oy%05T9Hg}?4$`_6 z)%)noeYAeTL26r3mflC1nr`aorp+;L*?BR)r!LS%mqmSL7j)CsSTGuV$hVu$iv~So zyXlg$Z+6r6om3nRdgU?Z@1ix75aX+3X4H&&_tK82iS0-IBG$SX z(#3w^;%j$Nz}rP$&tAH&Dr$Dq4WQa&Da>Bh8Q|J(x@kqs_jpeVrQP?^9$9wOyPxjB z%FFJ;(j$Rxx*O~7ru&3$9t8)dLnjE#EupE9=jo`!DJ0FL8k!BJ&H*(WK)<=P46?o& zENlh)I>5j!(41|w5NuzBla9so17!d1qovf1zK=koenzL!>vX!}L#L*z!-qF$KV1vD zgVuBaB?GfRM-R|JtZ67c27S90e5s-B^b?eP_#A1KM$kj}b1guu60IM`OaZKND?N%{ zL9A~n{Sb-V?yN5Jp(ugE7CnrGb{*0}$#xFIF$Jf)tNC7~BV2m(%gB!rgBA<@_#6)f}K- zF*dt=p2gR)--4;olpSDe*>hSfyXghd9dwC#yJ%$8Yjs++!0J?M$wIchwD~>LuAb-7 z1f8$zrk637s=7QYBSS%3qBu6N1`6K@+BV^{k=9cJ=Sgw85a=l?U!inxtHoz&t@Uj# z%*}s*FCN8%apdiRbp;K@v_4JLgF$_Y+{@)z)g9GC<*p`Z`YM>(OJzBY#(qqE{Xu#S zD*FchwymhPw)tmGmVNY>ee~D)meE;YQ8ni4q7_k}P1l&WHtMy2rY` zX+!PVIPYx5sbmI+jU9@b$0{+n-qD)tEv*?!o9Scv1jML9uTSYS&~+MkBDD6>=a@t6 zlz!J^(Y%2?J-}yNJ%H3g?q3U_fkphmxz&(W>qXx7h?2;bRVab>7lvld`(eyf7O2ax zNTN-UfX%QG=fL!ytG26+>riXEjA;&mXzwnN1SXyDvbos{s$*VHodC1&sGJQKWCuAG z!nw7luAqyq?DVsZ+rV7~*yRkn0>mB}H8o;~YNMe8>^H!Uaulrz4{*o>e75_9RUeF@ z`-Ghr0A&m|%6cJ4U#P|_GUyQwM8o|GFW=9l)@YbnBlnq66nEDi;J!=;7#Z$ZVHe#J zEwmLmX4Xc{1FQ#ewJPqSbhOxMRpe?FEwb8|b@PC_65HIAMoXhXF*l`RZj=ep#Uf>H zN+TtK-7ae*cBWL`RLR#3ur@NC67ub`JXKn0!Av<10pWWY8?6G{A^b$=15PdgBwPsh zYb(5ni{Ol1Op~b-P<;sm`5Uwnrlk>XSduP9>u=%k@!Nop%cu*b`{+CLE3|(RrspNN zX&=%K9!%fm@pKJO#i9Njx{epoPClJ>aT4eDo9G5UpKj!>X!k9;n!igo^N--E2w*(| zkbyMWDu$9}m{8J&NpfmO!hCrsik@PkyjbbwmN|Nk{@p4KKBp*kFLl&ityB5$Fmu4ga1WO0vP>KZkXSm_`Y)zWnY$BU(E9bjh3nlG zdajVdYS{x`71`*qDv)0_9Tn^r7%yypWbQ^eQJqkCK7 zJ@A!oLk#nM0B09smD?f6U2w_ofY*B`$^^$;?2Oy%)k0kYpGgGRKrfFHTMyiN(<((! ztNpHi7+E|kAH3qhN^Jdzl?EuW4d7irCAL9j%O{f2gN`AagCb)1-x3{2SP!PlzJ*e_ z-hpIec^qsNyl*Z7DEP7NYs5g`OT+f_v7k;DZHf4{DVxo)ZK*yyX=G(%Grm&S-9Y_O zKVIH}P!c*U7li7f*}ZyHy4%MNZGQx+CibodD|+p7XO@7@^pC7ij!raN=&N zMqo1yihC-qaHMfJ<{PLNpb$<}DN}ci4KDzw6X27ADkx1s$pc{dCX`udE#7IEe4j$Q z$BZsFK$~eiUDL)W!8j(~@6)TS+|M&q`~j&^ON7T4?xOl=*l}k+-s=2xb_9*+oaeLRx-l ztN=}7(E?>XU^nM8bp|;SjD&ahyp(l;79yj8>;g9P5bXHFAkZTSFdju9{usiHpCUf| zIb8UM;mAA=m*|(&PEWupJPGuFN}>Msa1U>zU*jIiGa&O*2sVDB2;Bx01$i7o?{f&f zk55*F##=r`5!$3Xc_A!>A1Q&`c`@n&Sj%m^1f?KWRHKkJgt=$%QeFmjcjEM9Io^hW z)_v650)@L0;p)*=U^Lx+CE5!sPm@4nuo8Wb2kI~b{)~l>wK=s}cPo@rZ?_(wQ;Q)W zK0_cKr`ov(x);k3mui-9NplabysHF0F6g4ZwKlWnOG5z7>}9A@jDIuRH7n`;l|DX) zEk6(Bcmam`c^K;7A-?;)BD(hWXE{Wl#j8|+UkMV3FKQ@4^bv#@LSBUs77fa<{?lNf z4QvvDH=m&p;8Q&N@%(Wm zIw{NTu?pAUMP&UC1i}9atNl;p9^Qik{4Zo!J^-gbq$}tnWLQ1{rQZeYeeUAuvkpg} z<%HrWXvJ$`RScmTH!H_A0xPAtHwXc0}pv3zqxf4t!(q8AbN=5apO~F|%m4Z0S>4!xEInic2iM3CvIm!4y!erb zk0!Gpr^!Cr%3<2Z1#}sk2>S{V))mn~E(S6c(dS&sUR*^C0V#9w`T4ex}C#Vq71CQD& zAwaWBd+n0rDCjZdkVlUp`XyR5oPtjN%cWAE(Dd=Hzo5hUd;vPcRrWps9Vvm`oMY)m z%xfKn#C&$aZx9^Rz1 zfu)6;ZKd9$0e3&0s*ie+4bVxGK2mlgWTk%TAY)xoq{zFvu2`xg#SYxc!jw<48mgGp zZ>)K*_+iDk9-q4s8_f)o9?YR07vkc6Wfc;`qT?(8Wj| z4cbX^pkkA;rn7*ZRV@eY0?|T?mIwG^$sHjjq`YBglxLkRNvC8<;4xb%d8-~)g1^DE zpSR()O4~>Mss;`9qfHjHa_Rzh6OCfd9NT4%Aexo)w}EyM`2Q$gE=A<7T!#TVKeI~` zl{}O}d=!=OFctI-;}JBPN78H_g^OL4`1MN_org@rm0W|YN-ezUqvJFJkN;od}B5*P-?02HO3s<>=|?T3K%h%ER?Q931$zatKg@5C}f<# zS3_yNRA$ui4rFf-&>FRT4N86*W(?zNQ3}vVLx;QY{f)tV9YzV!Xw143r7(=~Wy*NU z<>m{p`)e`c!xZ6Ncv}PoF6Qe|Du#JJQ@t%f?&Sr(0d)n?*E+rt5rm1g^;e@5DmakD zu3lY{v$~s{)!pQ*?j{OYW2}UmAYk!~I9m1cD>dXtaS0GfohABz$(LHTIIR*EhrZWF z84}{vO{9wl!jNenbMh z^;w+nBaj1rurZ}q+C-pE9UcqR=|UFjbk_G)h&)cf)InXyQ-{F#(P&T~(iI4{sdTS( za&0AG3sm~HoNtqpJY;iM#7Z8jEB4eCbh;;U}q6WiXz3kd$vk&f&6gr@g5bE z>;?=8B)D_L!`(Sz3}h#X#g!en2Z<%XRN(=Qt1k+|xI5qB?u<=G0Va}rF6p96q9vA@ zSQRVAxWz8W#kgy*NOwj#lrN)Ptiv;VO`uP4H{Y?7Owa@;nOzi$mdcT4XP5<9-FfCn z5w?mID0{Hc*}*~^DnstWO~JErzEhwr3#wUf;xR}xAq7y%3*kvDf+w+>F9$v;p zoP?ioHmI8BaY*aV;ta08r+E=?;x^ulnsad3^<2J*xA1O0kMDpnzlSg22l+x=DBOyh zaTnoQS|`5+5PS#d{{eiH&-hY<`5dEwHyIKBHo)vM1%ZE&^IBty1A$ZQl(jL%fxs!2 zXHsg6wo=ypjS)zC7ecF!G6o^JT?lQf!4;VwD+rv4bc~#|%8yNs(dv-utci{AkR-iuNxqR=wF4?W62kt)7lf#Epn%X{(G z3eW3#mc=OW*rvx9|_#<|0T2Q5s*c)tVPq8Y|(D72=KI|-^uqemC3>47t_-2 zXiwzi(Z6^PApQYE+FFStn}aITYtk})5EAY|guj3%qU1#oJdJ;Xk`EcR zW0Zs7S8jnEfa;o+7){K5L{4pwQ!%qg?75iDa)c24(4i*{IPCPnBOz|+Wisb;%*!#K zJ{axd0vvwUI(JsAvrm1Fd5oOvSr=9$_dP}|g|c9M^^+A;SXNWtbg>UkYwez1v>h$J zyVDa-`FlRBPDwXgJ~c=%JfnU_&cng%8!4>csXWga`#=W8o+kdWrhtF8^*i zt_l_WQ+q_#B$h5Tc>92mJ9NO+H6Akgm@ zz`Z{J@m>R(z0C*kW6B5M#2rNJ+07$y+y5B;DSkNhGbK^yNM?xT?v30vlah6c!xOAi zoFN?4XE$7nTTg=D9x8|9E%#%*K-+QrOO$*R#kfO389&xu%TJ&rhyS0kAbAi^&HN-v zA^a|N8jw_uCPntvQ9wLA=f=uf1po?30ZE_IYyjSb9tf;+D(G&ZfzbxiPiT&Ux$IzA zf;|)(;iA1(c;&czFuNp9YmtFbGE}A>JdSH*y>r~BNu7qc4z<+-ohG8qns+LT&5t7u z^-Jvk397|zkHJ1q;9n^^>)e-PPicyx0e+kVTK)>u(NQ|CISj-?pXOhqLMN@C5uq^p zf~-hUa>O&KoSy|o?a^N7Zy-xpzw)zmpw=rg`!_VeqU&(1;z*p`LA)5+Zz-5M)q3u$=owXH)C3wu-p?=@MlK_yA>f|a`h(2CG3q{_ zssh*LCpVgAILXkL8@FUHjMaH2^&%W-d9>u=HT21iy;_~6e}pBV9k7RrHR9JH&d1Ro znkoqUwL`x5K|Uljb;1n&b5J%*H+8Skz^z}yPt`^MF;9@D86i3m&r1ya!zf0fqL)6$ z-{R0~i=}>rlu}eOa0{S9(MF$?l*^fCg+&9H3%G4nAj5w-W^Rjx2vybP{DE?UiWMwW zck@R$sYUH)s@7B(A+^~>D}#nB0b!$*%8fGWYm`%k(T6Hwi-frvE2^BuR;e&f1D+QK zOAcT0TO11|3@}ixU!09+IcnSPLJhe_nszb5Iy-@nr4s}Y@CLVl+o^y+G=Lus?1%=C z3e_BvQq-rI6_B7GESVgIrzIE4@O6Wu0rxq~+@8e_bx3AJk-h6r^~M02VGN|X#vod3 z45l-TA(SvGD1|Iu2i{+Rc9$B%6eYA%zTKh3b_X}ME1DSCd6Pm54=^HT3HJ=#ZI=L4 zAC2i-iPFQb;H%w0l#R0!rE*w zWHDqcgnkGc9<&lv3p{mRM@ANcY@>4lW~`h3UAPe=O`^W)Zo?A`04ZuUl>r#Ld;W~E z96~>d>#vTOorGRvzIgWpNc)HOTPd6JAs0+_OGV1AWynDcytQfos zWCg7p20v(tI_jX!f@49tA3E)ax)snp&}@mnjz#WAOoR_N-SxS407+iE?nbB1zDcXc z2x1H|40lTkc9dqdy391raT|(qYAq3ho=ZpAu_WUsF&5DNB>1oJ=Pp4vI26FT>As(}km!Az7 zo`Cb>4WbwI?X6XaN)yU zwduf435IO;qP6#BlXrEuQQCj?q=7#~ so%Y_s_$Fg96&XvFcxq3hM7gDCgz!;L7x3dcGK>gH23P|H!P61{4PQ`^>;M1& literal 0 HcmV?d00001 diff --git a/Project/bin/logisticCMF/embedding.class b/Project/bin/logisticCMF/embedding.class new file mode 100644 index 0000000000000000000000000000000000000000..2a99242f94173f7730eef54cb87c6acc7ee96983 GIT binary patch literal 1125 zcmZuv+fEZv6kTVgFi?h4YAI+dH&NT7R0R}m0hNY}f+W<`5T8!lu?!3|WI98PzVaW& zC*MrGAl~8!_!mC=58^)4fsTojoHOU_z0X=}Uw;1i`UAiuiV7r#KEtZ(4M(potS#oW z-HKML>1JI)h#|Vo_j%6XW<9rF+19EKL#U$j21C!P=W-q0$gT0cJVR$mubbR)ZH;02 zUvHs#^>9j#P3ZZ6rD(YoL(9(*M*BW-ST=+7v`ADoH){;B0N4gutX+!iC{zvI)SWp7 zIh`p}@Pbv-6!e~m_LMcl+bCJ_#lwiAPelU#L{;#ghtdx+A*~f!I}F_= zhgWxq*jL%bYqbr{;Erw)a$h=7cfS)NiD}$YaPy>eUZ0v>Q89zt49Y$?Ty6cCKqzJc z7=G`%kjJco0`a#hKq6GkVV)A&b7^vBg5=F{3XXwq38MQZhOq6Yu+o}`;51Q0$#U(g zwy2AKMNX%9Ldd}gRb8e55Q~~XH-%0TI%ua>(B2_hPk)(C-H1>`58WmZq8*5QLv%Ba z*b(9%Fz}8n63)>skU;TC&jPw7kn-QYBCp_^${yi-_8W#ak1%qG(QNX&a)>c`_6-u* zj~Gk6$G|6CiX*$TaUii&^7Zj=!617KgPzhT>F6azF~UDXY;la!e-Z;8^Kg>~S8$cy z5hLl&kqY6xMkfi^$=U=z#zL literal 0 HcmV?d00001 diff --git a/Project/bin/logisticCMF/embeddings.class b/Project/bin/logisticCMF/embeddings.class new file mode 100644 index 0000000000000000000000000000000000000000..2af64ddfcca8882fad9dc415f52cc2ae0b6d4781 GIT binary patch literal 5224 zcmai1d3+pY8Ga^v%`p@b(PQVk=KbFHd7tB{sn_H~$A!}qLnI6sQ2x>TWpLx)1Ntx-Vryq!$7^q3PJ8lrv4(X?sjvzCVTxwD;4SJ~;avoznab-`UbHqcPW zOihfL8rHwQk&=7enfy@7QtyYeRw^+R0_PwwAGtuHnKZS1|~aZhRsyJnN3RH9jvaOgX4&4n;nUU{uly? zi@7rmRNzz{t28Vwl=J_67G0MIW9z-j*A$2 z$&5E^pdCa!n6_>pCzp5@d`#`o@rHQ;Y_d|RC^~VmflDNcn3LaYlOtsNMlVS|mX@O% zn{`~O;gpgKM9#ovxLiZ|a3*b=$#iauHAyh)ohbZ1Gz{vvf>VANizOCA0$YUwJq)Ct zX_M`?8Q6|1NuY^51G{Jen+xpn+95I3XJ99;qN!1frdBUt%EjI9vEZ}Iz#B2Z_*h7$ zoy??(P?7RJc$nz~t~Rh6d$`yf8QI~kp9h9p@)F>MxYodRAb0dJGuNxK$66VTVi>Fx zZsU`jzR8?zjT_jDQAUvwKwQQpGcDUGBQ`Bt<~UQG)Nr~=kA2TZMn#-N+O|fmtbtGBQv~jynaW$+_KHN!NtnBgsk#TBHZUbQj}YU2 zSufmJ7sF@pIUS$Xu&6}K9@8E(@OgZJ)F2pA_1z8KauR&ez{7ZiX%RiU%*^LU0_F$5;lv-)+-P_MAgh+n<+~g`Y z_+E6`oe*%k@C^gs6uAvs2l8er7p3TY%fK^umO}?!gJoAU#`2V99<)@6Q;#*4J)_R$ zQxfV(hQILzwjE3i4$3|SykOw_l7#XRb(c`djNk{XEyrYCI%C)EC0HY_Wc*0#`Xvp` zera`m_b0*`^6Fds|A~R0B1{4dCCyv}KWFYd?X2td?V`>oIqx@eogm;|jiQ-!YSOJ; zofvG-W|+B{Ds(j*!S6KG7H#fIQs>k}oeC=_xc)(C^T*de8Qh2>_%m^IL;tIU{_?!g zohxF;{|}um_GR+fVQX_zt~3>;qCQuu1S?t10bWhR{Lgy`dpcWXi13#;lwv=|K8wHD zKZSitz??qsJsge5b&5}&t<$iCE2`LQq)qu#eiGFK$5HbrmOjjG8J6=`wh{AZs1dMN zixtj`+7on3TzYEFan!9@HjR~QPU0+@IC~naOU?SmV^~wb+V|$eCEhi**x!g+bNM21RI^@ezVVb&_+~aii>3a*h<&x?C>xxX)hh z_)+0YXbKMtKO7ph2bi5qf5o%+vR5N z>+fdT!ZqQFn(%1m1RiV)lgg{(!AG#PfQOdg(fubd?ZjE!Yf4fRRE)lcYz68GYXkY$ z$W{|M*o+PQez_8BVe%X00M3WauaTRvo@LjH`^dBV(S`@e&>3{_glWW!{8IQQy0pdo zK3I-!treGOm*8@h+TE-bCe2GNTaPK$N3=GaWJ=3$s&*c}#8!a&->3K=ajZRAwDu&eN#*QBWR`3TJ9VO?oBwD^mCprp zhus=NMdhnlt7)^O<^r$9|3hRne2rD@sN_q;Qj|AQ(H0~Vm2&A*SR2=i$`y`>n+ka9 zD3;a)l|E699DO>5r6%n4R2|h$)j^NZXK%j@cUdqTeI<%dav|hrnLqWSi z%_lr)d18uqh%P7a9D$OeP2k%EFTlT>@f|`Iq@|TgTEYsTILcGVh(A@f{g2zNLIx=smxK;){ZMJkT1M z!j_7*sOPFN#a)e}tK~IOl7{tD^>vkWS#s;N`sAD1k%EK>idcjBy#+UgRh@dSQP3RV7z1d`>AD$fP{#o-{=s~kTgdg&BowK~Cb%%;t_^0Wz)X%f<; zTZiD3@lsHH&Q*CTR#aZy>^jCnKtT{+D5%I#K=6qOf;={v|8tY9$<~3Uzx1B_o$q|F z|Mz|8oIL*CJ@*4RMVzRhK%g`d>j@7e!@-7@l?x+=o`@O=1y+HnoAp6`VMLGiEL_*M z*$5^D9MM#NQ#=;z9S{)B0-DJmj`pCsTEKOmqk2}Z)vaB`V|rJ}M!z}gTC84d`z zHP~RcFW<54`BitgFP0>q1_i2p`TDMwCtthmkR;8};D9PevottSC`V-)wDhQ4!xR)M zI5F#Kj&d90BR@_j=AuGFCFaxqB^hI^6=zj;NMY3)icl<2yfz#)T2uX9Mq-2XS^5(T z>X8mT5ti>J(FqF{Nl_(qB-tBgR6J`Zi9H>+r{WoUsnklu>f#s-Ia3&*_D^;4K zVFemYu#@5B&;{X;>DC08^@)T&w3hi5aH0vT6s%;%O){-vHJDIpIB6vGWGq1kDk^i5 zqB%nk6k-ip6s%>8C&XlDHMC+K!y7fW(jj+NLQW0`6r7y}+$6Ur6O5jdXtrxO1sep; zoFoouOL7(j8jlAl11FfbtOkoy*HOQdJ{5U$0gxz7nB(Y>V=P^v+ zM>Y%Vh=O9#dOlPmDg)m#3QTEF>cPGiJ#OZp1Tv^0ghByZGLcI5%7p3G(1Tu5 zGWA)U6L+o*$W@y)^x;B*!h{jgli^sDP#2;fF$Ga-nSg=|G{kWc?d&lK!buf5u}k-n zKnB=+Sm<<4$7zhESU653kkT-SEz}$`NgaA5#R6kA8zpE%8n)qLhGQVy)6YjmqogNr ziH1w@amIg8@~`V=v2iNLl(Yh-Q$IeT;gh(GGUF*4e8waO=d3%v3o?|KYq&y&(n`bz z6kJ8Mxj-Mm(Sa-RDGi@CdArymCE;2PpTTEY{)23`vIGjwqc1i1oQBWi3yeT8W^{LV zWF{;Ar!;M@WKS-T2_w4r?IX(Z-tYx{DpDGSIWdf{Dfp^Dk%^*-N{gQC)o{Ixti{+G z=gO@_+3fc9(NKFb7GEr|Cf@?KEIn|%wC&`lxwESuvfNEM>3&@qm3lU)%!Hu zgZ<0{Jrt^!-GZHYqQDwrkqA?OB^apebl^Tbpy2+z+G!seNE-bbzK;h9UrZ*oCoLcx zTex1@Nrw!*pS|#5foT({r~O=!3P(alLd6f+gf~dT%A{pw>k{Ffa8!?!)s;E%5FV94 z{Yci;B$4Wnemt(>$9Tf5&28zDZI%mvs^Mq&xpXfcH==A@Gb)%D$FYJzeh5!$_$7YD zW|xemO+31{X8>~IY5YdPud`cBzPlQJEA3TGq#{ujzoQLZ3%0}(Atw|L-#fv;79Rb&j+)MbAf|uz*Zes}Y zV6%bwtl`fxZ>G|Q7Nb9w7-}?VQyeBcSnV6^)i0N~W`IMPQP6kgEJg-hE%Vki7l{L=#4zrwC6RNP& zZ+(!bh#qbqO+w{vBpk5TwYhawS;M;4j;6MzRZWf6WhQZr6YG5PtNuVS%VkR+(+QYH{)YO2HNKd=6!`yjW zzrJ|O{Z8PNATs~KD zAsLNq<8ww$JJQK)+0sod)-Af!;mtp(4#m@_Re=n0`PscBuYa58(Pa$4+MH}+XIyej zH3xDSj+9C+TV^|67Lekc?XgrMXsis&)J@+pjKs+@(=mrBV&Tc0XMK4E;7B#c^2x)# z`Oc%h`Q$<0eDauYK6zH>(@w6bD4`q|-$JnRKTusF-q?*&H$1yB%?!mv0AOv%CD()LQ%Qtnv2GkQk|QkJyhhSqG?p*Lpl7WGA%6{i}7Kq5VDjr zYM-N#PSTbxsG6~04^Hz~J+?8_)!M7}VA&WN>l`ES`yKmmmf*&*zD}KADWI!~F(hkMpE`)t1a|7Cd~E~_DRbfrsZVHw~R}Dj;&d9b1Rr!yzTO@9#uGpNSud-jQ%2AMm&d!^F277Q9FZMZ7rTh00*%|l<>wmi?;-s z?{Q+t&}Y%-D1$yn5=)}bk;IpE%#q9k8;U3+WMV#_dcVk#N{y%EOXlc4TJUAFa-XKg zouoOadlA0EQK6X|UMPiUS`J~gX?+K;A9dvSAJ>3-B~s@{uR>fBX+x9iq%jMRC?ar=n4(_cD{SwA0~sxMITCq$g(|ERb479*%0_WrNWjepC7#M1)7M#^?1pcV3S|pULtSdu9}3!W5u4W_>uoza za23wQwXB+*?3y>SI`*+c-pk(j0PE*bR?q=v{40FE#fo_shH#@>OhXSVuvY{S7ERbJ zR-;dBLqu%n_52FN#5FtxUB_GbFcRW=q{QtQ6nFDly&qe}0~ityW1D!4*S#ljiFg*5 ziWjh5yoAfeYq&zZ!QWKg!Bqt%xZ13bL-dL@kgf0nx6s5CSwE!7YG#d*R>aC=g_D-) z?;NH3yPPfNEO&2?rt-;Ml%o>TSaBS=NOPd1z#>XX;fBX8)6J0vzc?s7oGro(@wV`C zRE*i;P5N)=zELh-7d}cU!94M*@RR1k$>L>s$>PR&hIm$-K>OTSA)XY|(aUXPJ5(`) zvtHR%GNS{OBzu-u=-44b>>|=aY_}H<)?r2TR6=jrbN)vOk(n!llGe>ggx*Ags z6K=P;bL}xVE+Ib_i}_H{M4G~%BCys{MW;nNCxQawGXBdeH=t?@A?f=SufhAs#D za0~U}&dlgSPNBO|yiHZr0L;YDO;}PhWV&1UGBHyQZpCdxHYiq^R=B>iK^YcqSJ=Kn z+TrrVgl3$FM9{v4&rpIZHc@tU-^Lxd$wn{klKKyl^jf~8z3M?}#F4>gE!<7WHwwzZ zJ@}lYrb~f;xII^_HA>>zt9W8)C|EF4}@RlV6kKaMbpWa z`&t7`FKFMv z0~ls^=z=uvVb5j5s?j(e#6uR?Hrv;~v({1$zK9WeG9xu0Cp_%nOA;um-TcgyD_dW2 zFeY2^Vj(|!NOo<8bsdZ&%h8f6vW{eDUC&CsnbCpK(XM`*5wwE|OtR>bH#yB)XMb0J z9E0#2oRP?pz%K_=vXgf(EmX0Nzzb%_yj)30DLR z6NXie+fng-+QBn2XCv8svBc>bo1mmmxYJxFHYLRg8l5Q0gkl+0l_u&yl~$vZ;@fR? zhBZ2AZk96+<#N6*5s%RLIq_a;srq)KcQ{L?B%Af}FzoT5e%2(68HPie%^lLc3N6Ed zn|!UBOX> zk1&{yb|<5kaiV)3r!HgkGQM2FR}aS8VvEQfw3E?!OuYsxJcn@80Y@GmX{w;`5!%x& z<|$lz+LDY&V#$_TRz4F+ZjrQP)n-=^m{qG&!MVhv)if)3Trzn=;q~QWGE%_>!KcaK z(_@f{VK4Exi#tPn)j=U+$|??-7TLU@h+=xaV0o^jEnd- zF_qPK@Lis<7mIkF)-a>Li|^qDQZ~f#^di1bq%}N_=pP`)^TzQKS2j-_=Exo485eN` zFZ10NW^pH8;c5$h$k<~DFY`kev(Rq-G8P^*i(XQA3|QtbQhT$}!XV8}6wkeC8K3ywGFoaR4N*kmHC{v`A`S9XlSZT_jVPt@#%@F8+gI=tg>-BVrDUw8 zk@mEej2d#Sb>uRX&=;P^JHp{FLR8bGbiC<+J%^iX{_%PIy4AjI5x*Cq{$Z8$r)|}0 zC&l{VJInt_Mnw;g%Xvl#7n9LCCG;9rTgljVaakwvJi2#Jp}?rk1ZAf^gmJh%_iX?oFnICcIM8#cjkWIy?6NM{_Q&eHsV(W zB?2qcsf^uYrD8@VVUP#L1Ojt4w&z8T-#**oagl@3R@#+qHRCdFKVZk#iw473%1o+7WQ?OvMcqE>vhJN2P$PG0?A| zN}zI104jW_D2Kpe4W%ekIa{j14^`k1%h2P~APFq&G?PYGmatj-^uYw<^>wCVdZJ&q zOt~%y-S#mvBT(1*5Y#0GL#agEAhf=@0SkDB%!JXU5Az_ghrbR5YXp`|^OVQ4Eo(|A z9@Nt$n=!0l+_Y3QqDetu7O#CH8QU1v(2N!V?@>eMMB1?I5rO)7NGPy5lR6batWyx0 z?be}H!+LBGP)_NItkFI6fXjSJMdo$68k;0hn+2-pyG6eTTbaY5U@$bz#*rS&h?_B) z3P+;4t%p>s#^V~okg)3Mw2_Pptewx_9HcVmQq=u)kgV5TkJl+C} z8s~{a!A@pTF(KQsLqi6y*vlL;ECuc4xVX2AwC3e1qS&OO4Z9@(9ZcxLg_+WMjYxpu zll+o!SYp3dLl?RQN;Bz%$qH?lFTjIyB8*7eXEpT7K-{EjSmymc4bOp9<&CA1wr(af zdkkj6EDnma9gzCZYZ$-*mQy;*qqogGx}Z1X;G*SRQ8`odkcPu}fv!^0Rdrs8LHDph z*o4{8he^ka0xP0fMjF#?qc-2Qg0=0vz1_XFbY&~{@E#2v23eK|MIk-*NoCNM|4e#J0J)AH` z`XqLh#aeKl1&ShGyoV1IyicFg?UoxFKEy|?Bhxl4-FZ+J&$8_($m!q`K2`8Zv1A<2 z#a#`TafM+fjnh1Rb|n^g^5AnuRvWIBS^uT%OIIZc(Rqs2gRco`3pEGvt%hqdT}qv> zrR2Iyph|h2A2j@kpO{L=^h}p?GT#)ILWz;&tJIguS}{YGHP3nQaScikP{)VG#V46y z%XfyO!c_@8l=2OQ%DI;kH&+@S;p_zGZhn_EjbUNa1bhQySacKBP0cMeIV}0zF<-{f z=cuV1y;w-&#aMtxDR0V~V>xQLq9d%pO1hV`TFy$GSshk6SKyQP;1$H*ysEs^!~|9k z_#PWa!#LKCVWV$L4%?df^KIwQ!e0)pVfO^u1||?4@Vm#bYYb0^OGjb(%Y4t|&>2?z zifcm-Jz-DNAE@?wa_GB?s{Fj>PbG)_`$yrufrGvyIp}`HEtP#k(zM*QMIA+L(eBt? zB&t<^d9^B4#*ys3pb9CtbA9qZe`yZ(K)CF$$%}IOl7z_x2~#V<0V+_BN^CL}v-jn*`_@zrRuMCLfmDSmQu?hVT-ODM%-#Af1?kbb?$T;DM<$l2;6-F&os)p(@YXH7 zE3kJIX3H&nEN~aWPSPMLZ!XaNS)O6?;%Xg5b75DSGYKReHS5Rmg-ndEikjbCaEY23 z8sz3*+U4*aPx)Tp>g45VYF061pn)_s@_*I<0c#=@0i@Bw{@6euf)4z(gq8O$z`qQK w@H62O*i2w0NG=DYb#PCTXBS~08kc}cx^Ndkxx@l!m_+rAed>+UZwY?+AMByC5dZ)H literal 0 HcmV?d00001 diff --git a/Project/bin/postProcessing/Similarity.class b/Project/bin/postProcessing/Similarity.class new file mode 100644 index 0000000000000000000000000000000000000000..58ce3b56b496d403b77234c02320cee6e98c43a4 GIT binary patch literal 3794 zcmcIm`+E~r8h%fjWG3k#P)gkaL|~Vi-Yw#a37}}}(xs$mpa}J{lVoV8q)D7i3RUrf z*L8QjE$gnhuDYz&yW%CK@;pBN=(GEy|IgO<%%sVr#E1RCeV{Yv%=ew|^1k2q<^21< z-+T+;CM+lj2fGG?h{>RyD)-foL1)S; z32Yqw$p8{`aUhu#I2>E4G@N%B;V^wMXwb1iKr>yy#z|no7Fg$tqiQ!H4R6wL9X6BVljdB?B-d+We%Saa(TXkD zreLeUs(Ne`X!s?5C7@c4X&bJSj)t=%)$cUWiI{>LSQd2y%Tf(p=w?mj&C~SWzHGr) zf5fG?p)5UQW%T_T25=*bW>SXU8S}yK)o!AiOryCS0)dI4D0br43T`GiDiqUWj zb_poig6x$nSFwqqiJ>kYi(|I}R+Fz&h6-iYxrRMpmngGFu56B-5Lh1@@(18)5s%w( zhk|_#fRbnpcVdXi6w6FzSCeG^r}QM+cWL+y_A`%u_kJ=7X@ziX*1|1dcNqZ%*u1C zQjU>1$rIJ7w~9#DN31~VXP7(w7WDvq`RBL#lNr0Eay0;>5J+pd2CHScZ;HUgqyqMY zuWM3sCC8l6Fa?Vw7i8J5_dvA@1BW<19k#k@%y0sn6u5R}vD{I%a#_=kq6ajx2l=ay zfa*|=Vj66>5FU~}>f^C0?;2Mj>uyVYi&QOZn8hhJjZrL`dCr2qW@oN2Q;m;vb{caU z9>yb_mQKO*v2xbcB0Gb}6+FhFiS5Y1-jIPkq~cAEi~$vI zv4sqWKc4BbW0v~Tr=tAndqPk~qy;IE8sMDq79f_P84MA5d~9Ddqcv&O+9dC=z8bJ zx?hI0?2*aRcr;Zg+ZppNOJ15**T+P^Bp+KDO$%>sK#(h5wfI!H8{k{;9qL|ydXb9& zqI^oRe75qbc?x{5;!emuq}Cd)16V7Ki0^4sjVOdpy|l71mdT z#BD7&Dz%3N&M*B(59%RZz3vi5suKEkC1>>D=L|FGrr7T$7eR!(POf({g*{C1I1`*= zijQ*rEH4JHQT{g8yV@t*lNYK`Fku`-(uL;&#<&Vd5O4^0%Ns4u;~u2wtrJBY#u1{v z6KRZN2SM9|V>n8wN`KpMFQpNBZRgh=^mLqdI%v(aF+m%i{(aQHpR%I}E@1-{1wugw z|MPV_t#$%Syqvc-d{@vziGsnJPTvwQcFhhm71nEw-QT(F&eFX~r{}tRde^j9U?lq% zkrD974b8*WLp7p;Um$xK)5rASCFB-RXtOJDDmY!iqkVdC5l;qK17|CEIbB z%4?>q=)J6L=2N5b9A2o>cn*JXX^imwN7Bb1774R?9wB>BKYNsTgl%79u{AkF>o4JDa`6gn%cgzRWn-BBB^wvp&W*Occ94j) z{jGxYm(g*oM*`nlh41f#uk9ZmhHEx2;O%5{l?y(Vc(A$7yMCQebDep=PWUne2f+t% zgVxKjt`*-sJ8*_QB`8H%q$i@e;XE-+VKF{{H$6-~jIwL>LMa+x0#>_DFYK!x}ffc!pWp zQxIdwUhuEHVRF)b8eZrlPeFnqFSS#8)@a+86Yg-&b{GnaqiF=V=irT3V}rpWKc#jDf!pS-9;%U zd{R2LJZIJnRcalEWZxKD+?zV&I}7za^nE4bzUNRof@o6n-kwsO%~oiJJGyF}bri6r zpvbTpQoV}%c)+kW*1Z8Yr&MXatSwdL^UzfAaD^Vfv=p}SSj7`OC9px8PHmz%0K1Gr zG$dm0kukJi!nn^+T%~9>nbY(?Jz#jfTu=b%Hb||L0=-p{#RfyFZ%>_({=pEVlN3|B zEBeQv-}YRBv>4HVAvw4=({e}JS~Pf>V{2sEVnlVkjfv8U(;`Rd(1y`1 zDc72j1jH*pkorzS1S)+~^anJ=$e1Qc^&jCLGIXO^!Uny_ksQIM3^XB0gf7;8BF}J* z?dn{(!lQ3wiOR?c8L2L#?)V--RPv=lpGkWW&q<2lg@{Oh-&VS@l@Q4uQ57#qBVg1i zlE|K}RIgE}(YeAak<^FdC-tF-U@?@XO$#7YP?g#mcCklNj{FMP$J<0wrXL>wzf3XsRn zF#Qjnc%*%4LYZNB?qBo+I@2F$={k~P%dKHLZ6?yW?aSJ0@2%rs{`~nRfRpevC<43I zY}dQu*mcu&t=96yb84rw!rop|1rF&-G6X=_=T9#K9h~@H&0_wEgFjI&j zsUwa)fppbsne*$bOQv(pSZbOALsh$OG#3rWlIw6&_3m1(K&INmb%|zOYk8GJZYDpI z!cOecFeEV0fiq`#cXjMWnn27)YXagCpBbvOY=@fh-0e_xM8`fUrn>hX;*UKr>jKJc ziVBNQMPNtGGwLgI##)F?YuJ*FhJZ#Z(hjw$Rt$y{G(rjLt_URSHYqb_EH?##ypDr7 zBxmC~_Fyk1c}~)nP4Dvjyuetl>c_B7h9~H;RLMs+&O45=QMFu;3?ysTa?9}69WrSA zO=~L9@$*vi9Hy)k*_pA|$#bdv2HQQ9%OwqrYF7tZ8%YX9B>Qn1A8ME+RUHwc*YOcP z7D$*a&)KM%q-19<8V|Qh(m+Yp#*{!d8ppKVY_g&(yH(0BYG6fWdep+IpwOFtcnPSzAFQ$qJ_o@F9u?W9s7=E}gLnlf2ru*u+wDWVn@(*%^67{y-S?=kK)?szE>31CqSuDjL%U2^)rc&ChEm&I6wuy7wU=XQZZ2X@hWG#M zX;|*+3<4C`_t3FCvu4T~zre3yqro?*W@=h?)M_U$1HZA^k%wCttlQ7HidslSEToXZuy27^LTKSTT2Q%n za9}o8eu}}d!f28IEA`pOF)>o7F-oYqW3itQWEnAxJce#pD0!Tpl16oYz42XAC2 zS*ceZt0JRHNWbMeXoh|H4E+q#<{n3|Yu{NegNYThZr7*sQTqRf7f}9mr>|m!u`jGE&)c25}-QA3>!^ub+$w(W? zUL5t2BykMKsW;(s_JK^V$dD_^^}-7jZ*Jn`T;VxR3Czpou}z#gqh{3?ILo6?ws0;s z8GnlXN3&`s{u~z-yh8Okc-Oze=Re`vjmL43iSK*z`q!gB(4^|8@(?Kl2}m&9G;$;( zPhyG~#|d6!Ma=QSmGH{}419r`ToDj%F*x41ytZ!ROTtd$9KOQW1f0brzQMQjkfSY` z@HhWbB3Vnn3zAoE9328t!PgB)}h0N!*3irs|&G30G-T zwwI7C6x3=Yui5mV-7G=mbOu`WM=+3{EuohJ9?&)1JqcP%&~j2PcmDESXzZd zD5NIc(Z+4f^G)DRhM3^fzMzVrq^ToIo4I0Ct%hVvM{}}85O%r*bstl>vE#x_Cb73J zl^t4XRl91+nhi_W)-2hwcHyGB+9~RS&M6mX(`{3h@oRmewIeyje6QWxPB#QLEu}7J zrP@-ta|A_Gr>z$RYtk*rD2g!F#t9g$o=mkR>pONeB{ORiO|6_(T9eF?O_lE8CP=i_v=k>EOC)!wy^NER_$-T6=V3cPc13--+3&e~s`ki>Suv~CrK_#bkQAet^ zC7Fq030Bys$4YKyw}NoxHX>!Hjr(@x5RA zOog_q{XS}wuEQhHXsjxaV7ow~5u}Kowdq`e#j|nMt-ZBohRKh0DFTWMnK(wI-eR2(F;XJF>~ljFuE3 zGaXmixEj}xM`=msqxNJb$MyPI^tk?y7^{qS zyA2y*eY(@eaOdf68zT|6a1U|i*~r<5vt$%sP^G?v`z_o@qSP^+j{3^tHolCn2<+xe zlH}=35lkDbxB2BC3#U!H=Jb{f7Jk3QXd7hc1r17kc=kPr7n7pJT(aP*HBw2E5 zv5Gio;}Cji8REs4B14c&h3S)g?L&L&G3r7ZT$7JXT zzUCx<1FORq-m&o`Z7gX{x8)M4w(RocUftBc+W0pmun2Rev9AW;Pi?$wNLOK}6-NX= z3irukj^-aI!nj}8ZKBtQ=r_A%zm|Vy#Gn;iLSqsj+J$LE3sF;b6KsZz z*Y3z9vpZ-2va(6hL4shtGR*rnMj%S*IWt5sT=!bY9}$Ei6pHZ5Mayby)-qltU`vq% zSp*Pz$}P?Y1(klhxH>uL@Wrlc4UPNSC`2q%l2JosqzxIZB`ql% zkdd7Bh;ZG0DWrv|q&gxea*vuqxX6rLXD%wG35R^$3pjpU|$pYQQ~Q`C7dC zfW?%Y#<+?`i-3o_B2q!i)Td{Zr?-`Rwsmzb(eT?dRQS1jW|U-e47AuFPkXEBMrYb6 zm01j#sna&Gj_H*-w#-#_u+o{9R9m7oA}p6>izb;&k^o@koZL+&VMAMD7aIauhT{HD zXblf#$34w``Fc$?okq&f6`)>bu0T(1}P0mJz%a)YZVnyb~CVKlYu39cydJQXrcM#bOvdI z19l~LCTmhTt>q%Jik1Ap5-B1LR5zO==2%wBT9%cvZa^9BM!r8^I5AxC>}=Vfb>?5_ zFpZk57Q)EsZ5jj?Ex=@}mxPSf^L1uKcoGUNNw&78 zBSOLr)RQv^Nd?w+8-GKCKBa8zcAi)|e@e+Dn8<9W8f8PQMtM9$<%p}G>P=piO9)Yv zDK@jVd|=j*WRY@)BzlFtgpnF`aGtvX-+DFX#YI?h#`}8X6SU5R7>01l9`&;L^iA2E;b~jcL>b=gcfg* zEPi;d*0Ry%wG9-9nUH`|2}GDRK38~VQk~Y1i#1d zt(t#@e?t`qapE(42q4C9ou~rF@!NI=U_2)9O%wwB*2$M~v?_l}#q*fhgL1)&E{v+` z!Ku8Nc@|Zb-I&FJ5iC)Sdad3;NtHd=8o&|M6lTYRT{tZkG~40)dUyVGcYaHLzK$`Qe49f`w^5?E>ucRP zr8~HyPq&K&-DoqbUs5p4sdPCAi3ym5xtNU2G))KpUO}TAVEZlH_;Z+Qj8$$d|GaCg z3D}7&ax~6nnz(~cMaFEi5$vPi3ufzMbBIQ?utd#hOgXua<}D1N7Z~sS-a}=A6xpfH zF1nk8htvN@da#^v@!5m;+=_}`+}L;!H@|?1 zeEd8gZ+!u`6&~*>Jl<6_FW4mud+~+F=?8J|A$&0&iiLXcfFK@@g-g47@OSE?CwlM{ z6YJl16JTT#Xa~sQ?1p5 z*Mz5_f9k?hT6`GiPmB%g z!S@3FWVuLtGd8Rn|DucBrh|SMz`Ypbyan?rbeZ7d0B@K9>FeG2ad2yXkm_LY*Rek+ zRo(bWWBsciyyQXe-wD0Z!6D$bo1qL;k^`!EQ#Kv7n1NM%+k{!Th_`SF%t0H0xQB`8 z7JQmV#53?B&ctgR{VmMLuW^fElfzhH;Cr)!B^SO#6@G5uTP?TZ7x*R7mymPtD+RJV!cpc47fBQTgX;#-CNuG$ z_zi=8k4(jH@jFu1i#!7T9{)w0zJ|B(-}nPr^jmlXf5e|qOf5U{XSPMD>m~Rfw%OEp z2mY693^T#<0>U5Y5quwmL;AQ~+Y1!aV&Opxk6Acu;R%c9P7^zQw9@bo8)gQd#_<#i z^f8J*megWcLr7HtfkOQ5V{j^C{~!LsCuQUp8TU%As|r!~o+76}YV}>Hn(k5B3hg6T zx7q-MAo>}7-ZMZ;L!UT1-^X8JDOD(am z_B(VIGvg0RxC^n0Av;vI@x*;@c=w%N)h*G2Tb9wJz(y0jaa_o}c@Zx)x6nBWUUxR( zS~N3;Tks^3ysX)VZ(=)sM)dy@7t`!5V&Os=iL{)6cH_p)j?q0g%4jb>8J$jip3SQW zCq9QU&YbvMhL=rL2D$F@CMrW*b&m`)@%bcH$#6ozG7z{Np}*m5C))a`U9lhUUaURl zMOp1}`rSOnxEOt$5uSu%lj6+yeKb`-MhtkwDro!#$G8Mm=nw71UD&` z$rv{)%GiSvt1p@t>cS(MLJ!J>cv$fn3%WVaQe-GVnWB6Nt2kDy=_NW{XO+pPNb9kv z$uydDS#_9dMo!|SA;CMxbQtiKRK!X|V&QI?WTf#4jZPXblT-7O_J^Eo5)U3j$>>m- zOnsnlrJGjDWSZucP(H6H9;bIpWdV~7tnfg8SF<45%CK(6ULtQF(zq0tvv|CY-*@0M z_#z9OUR*`azM3~%*O1|_l@LBF5#EoF!}T(YH_P*Iqr`EO!RCJQHC>d)=6nyEyy|eU zxfSn7l}slYZsTpOVlzm0S2<7A;|C^-gs90qGDE6KX_w;$gItSRU2TvXp>8>oMT)86 zE|W!~)bCV-<`RS9omyUP)5~ZE5DTh(mW<>BS-B7Qqhw&Sg=HSFO$IS+-fyxJx>jTT z8XbvbN?vI=JRf|PiI1!pc>9o?&dOC8N@mY^jOIq*Reot+(1S!fKb8Ds**=0Fl0=|Q zKb1%Dm?^Fb9A4T^N1w!)jH=%g|W!uyD7c3BFUo`>$DD9;H4t32xpmmkNk0{o^lP3 zF0AvDZj1$P9I8z zk5>MfF7$TcP}MO!PshwUjxC0CL>I2=x8`vMZKmuCK1%jL&`nEZ+EgXy^D~(lN@q4BlaF#$;iBpzw}&>`*1UV*LfXZVOD#9Jn*25$HUUV=goLT zw%}3Lm5<5e{CVZOctYOAQw)d412H@on1Gi9C*g2lGL8gJ!`E1Fd_B;KR|A*g^}rSQ zM&KH}5qK8g4!lKv(&O+pp0XKu9{U8|mNQsYGT`{6*^OsdKAp)Y9*y~={1T-0=eqfD z6K_!B>529%z1n zV>I+T%#D9w$^K`auHMIQWh{OtRrtLu;&UB)GzONa6J>=L1F}MDoEYHKc}@(lSMRfm zxb7^2@*t(HW+M6>LX=q{wHgDmn9|GzlNlf3cs(90gnjy7F^7PC4}nVa-@q`ap$cvg z%vC_@QNoo-*LQLyXDR*;e~f}{3CW&i(LqzN=}GCX!YN9xhSI*PA#<_h zT?}~_OLCG<;u&hIURu1_Ez9EJE=*NNx;Zvv=EXu@ep&7;+07!3gG`=%OD~l8?p-n6 z#5&7YV^t^{@iaS==h&4z2yWw1@BrQ;5B`Pbc^^-+{Y6&zuiyjYxjFO-x9@q5PtLh} zCT=zrQ-D$u^o6q50b)pxY$4;5TCH`?%O5^@T5u1*^YTYkQ~czwm8SIZ_cZUW$s10; z)DQMc;2{d*m8HavQ+JoiN}5J*m((N9#S$-z+#^*y)P+m@M?5{8nF^YvbeXJni~E>ld8ahYDT%!}@iijEi^%@4`nvS?WxG-dIAy{Gc79pfye z=K7=9t(jwC_o_@0;#5~$r(5{SYsgFT5&SC0=jG{Dp{ySe(>yg-@)SBliukXgf{3$T zK3~E-Uq!HmB)dbR$VmyVW@fpa_3B;BOpnk7k8*5}l;L|k=lzvG+l6I}jFl4&GMuV9 z<{?93ashqMbr`WuRUPMTkaNy^ir0Yrj+)Sp4;(H8;2DTOWz>zl0#om5|`*`W(tBP4g>|YSBWS z?KFEu_EBa_t*1}%%~X~kKlC~OhJ-KV3-@}*ZH^2S=Ky^s?~pE(iB2G`^bJG4aEjE(mF+mRSlIJv%{Q<&gE(z@buMZdTTJQi1o|XK`_5d#m zHWRW3uoWq^@~`s~f&cdhtpPMC%)*EzmVB;{zhVgg>em6(<-eBe(||9sjaCZ*xj}B^ PU;TNcm*wA03u6Bd;$D%T literal 0 HcmV?d00001 diff --git a/Project/bin/yelpDataProcessing/ProcessYelpJson.class b/Project/bin/yelpDataProcessing/ProcessYelpJson.class new file mode 100644 index 0000000000000000000000000000000000000000..18cb83e949bcb54ae4bc8c770e721adf786cb48a GIT binary patch literal 7695 zcmbtZ3wT^*nSTGdoij5hZJIQNfzqa-lq78ukT%!^*wPkkN*iheDG_Wr$(*KBrZZtK zy?_g%1y_-(CUR?L$qB6eUf6kfAWYR7B z`1DD0&iOCj_kZvEz2A3Eo`3Dx;{YyFJq7{_EeD+R*m}EQZ^>mxoP0i&8SU~ehd8n^ zpUoHuDb(+__u5@)o5NQR?{-EC3X$PrzAu?qn7c_277M9#*ML*#RfrCxMl*Jym~#}) ztscF^z12Gn6mqoJdl^?mE=grlh07Fzt!;w}p>^4$6GISn7NUq5n5!^v+R6qyzl)Y) zn1i|~6dEjq5fM*P8K=LvXV}SIV-Kfky{4?~4rDf+#1)9DXS*UlWt zId;KWm)$d#b_&i`XK%{cCxKCDYOOtuxF5S>|A;eINM$p517|BVYS&ZQuD+|wBL>b< zSl|z=E$-auapKXzp*O{?znB?@Qg=M@25TTG?7$i%q{W-;&NI+Lkf z4Cmo|1IwzQ8#s_JID0I#qD>(_>J$j|n3F3UP*^tGQEE>Y!*X;OSW&&L#?eA2x)hAP zcDm?Xy>rHvE^B(*X4|qE7YfX0nxrM>b#-=j2_y4Pp=&q8+!aGJR$1uA#pH=SHs)lK z3LUS@UoSMhvc+l(Js_TDA?p%PVSekZ{w%_UF)YMo;?y4yJO1_8_|9cs=3|7CIuCfx&0Qd z#+eG?ed2<6|7HtY^>NAGq~t}A$#AR8z1G4YVw^jY&gS*pYolb#kcGG4dS+m@$n0)% zf#R9BFi8uHm}BqhHSt!m+f9Ldb5FCxb%#K6qr$@0zS?L`12BxFfsu+_sMyznBhDLR zVk*tViu;*K5=auyq^4Ck1eWiP7}NRRKEZQP3>B__?7ND#8c3~eX|Ixxa5>jnz; z$W5E=F;6Vcs!sp4MQS94QUt&RRix$AfKnj!+#vASxQV@tYJaw~IlHsj-^Ix{Gm6Y< z*9c7)9DmG}nDb}hd2Kk_22I>T-R<&C8h9J&?JJ1OcbB{|+$!|C9d{UbJ0(-*>~|AQ zELwO6-l<@@4!KJd+GS1*yXFpvYGo_hPk)`{`(l zAJP`lz6UHkh=-^{&do(TP57%yfVXekTTFbE)XEE-1|F{U*##tykKq#*9+5PS(sB2x ziATAhmME!Avx!gfh?+5xommlmdLogOi!)Av2&ZJdl3g>!bC~q!Q5MYHK!-w7DbJYJ ztaZKRaCj_jk1&S?^*a8k!NdJ~>NH|I#V+RTE|+#~+{!oq*xm)t4O30%t*7A5#6|z)VMHtUyw#XJ%vsb&jKBi0+GgP#y7hA7qu@_Tu5vTQ) zCD|S8HB}uQWB%pn2rIan%L=aQXY^FQmemv+zoeV0LA04vG}U}UAy;y+1r&@}%v5Ad zgAb)t-GY;`(<3@o@t?U52Q!s$!cA!A)LVHLa$;RtzoRwBh zE%mz&-87dwa8oTKg3@X15JbrgQ?+u-{y|}VovD^sF^^2>t5IWSrc(w4Kj8+&5ZQ zHY}lY>?rs@xEo_R<#SoU_MmEDcaXwOfPr3tbILPPsVFe3+2Mcy5p2meb zR9hKx2EhxJT>bh^4RNg9yBfeS_8>zL2eAWLUPUm* z9s3X}ae-k#8CY&$p@B8LPz2K|;hgFLk`XAOzWS**ozC1m3x0W`7onkte}DXPGw~b8 zqSp&n;gQlw0m}!fC_sg00jiyx239d}-3;l)7{F?VtcL;X<@r{#W4#o2;WC1=Mgy}! zv-9CHFb|i3DU^Zf_kbBB_g!H2V_6xPt2|%?jw=u>(OhIE91`}ksvk4n8jD%xa){b7 zGnE1t(bYPpRQz;MG#v0FE&0@cs(g~nBl%ZOPnX2%2{EmL-Nrcf9>c9eiSQBp;ZfYy zV$oH44@BlX9FgD|J=*I-#f4@q0d=GENOPu#JT#HvQsAAZrIdMpmUB!J)@anKm zR(bYOUIl37WzCKtZGBd=BgFf>=0q4jrVt2E!c5vvYj#BF&taXm20d~xj>jnox8nwm ze}+1In2ddbJH_Z>1-#JzF3>pfxAg1 zIpeDTAC*oi%}@G-tCHrYHGx7NfsBOV5{OKioj?aU@Fkx>lXylk29ph6)~wl8F?L)y zIEfQvReT)lg<)S;!bYETC}Dho=$)j}|A=&X1vjYx(khHmH5ax@ zAg9jZ2ZEI-R1k1gnSiT20(Pm0rd@#6WFM&(+wr6;MQQ=-lPgcx;Rl{5U61d3)gp~Y zHSG*~_K>EXNx!z?FYuQn;0t8}o+JVPiUhQ1;XJ%Z&+9Y+uaio%eb&-Z;xR3snB@v& z;7$Yg74x`jNsh^8Q<#tb##8WxxZGcy? zk52LRawb>QT{IKr^5Ye)>uDKNRU|i$;|77h!$Uum2)SYvPK0L{s}2rKqaR5`1bwY> zQteEQJ}c%w2e3`6O8->p5x;x+S0XDDCh_m0aW&m$w*bOV8gYD(A-j()%l#~4A7(T2 z0K@wrVSk8W{RrN}a&$k(KhA$&!6!7BJ9Pte+y|5C=N7LX!t8dMm$G3LxPn~W;$3ZI zc@(%pw6RXZCFEMD;fl~!lZMNnMN7kFYPjYg@LOD}yUyjW;yPl$_nVpM1ley&53LUU zUy!s*BT~*vN#aeX>RbMcQ}y)!@pE31cyq8jG_Hb`AQC$lM50rH6+sfLbVDOpMOWzF z|9=TeA}Vf7zcX;%uge);)6ux=ek<#&nsue}Iu@Z5O7+Z1%$Za{#ZOd5&s@EP+E6(^+lDz#;(f9kbYc_GB} zJLX1p23It(sj6XHjyQwWp$m^u;14rh$C>7jvt9l)Iq(EO!hRO-V^i_~6ZR>#6DL@4 zrg-%NzCSy1tS;e~vMX?0*{sq@jH^BPn!1@^#EPuc_pls@ z>^v|3t3#Tf;GctbXnrbeR~KlB2_Q)g(o9vm`L5-1)>a)_c0%;;mzuM})K@fTBlPuU z%~^vUzon(d^9@WvD2#>2=%IyCEI?fLA7{Y8R0%;QXLgnbdrfO|sy9p6V;%W)D%+j1 zGBVe~DEx@iKzkrV#}{?{*T545Bh}X@V72FHxbj?(9!tYmqA+#LUG6uq z3=IlX()pa7%H)dYo4Y5daH!N$w>64Wusn=YJx?_^TqYRg>FMOcj#Rc}ZrG+Ur?$PW z!VZrxVn&mJGjSHx^PH>v96NmNxD;p!4U+6Ko7Pm1dD}ihR`jpgw7+)#m9mm z8sB8#5?m?-wK9VWvui6ccF#s|IoTj-d|9SX4)q#H3(-R6m8Dd+7y%Iv^_GfUj-X$Z znY-)=GBi6W`?nj&N`OLHGuLkqM43c61B+1`2032AVW!y>pQJN{ds4f|(;S0Cf`R%48!hTj6rYSRq znPaoE#azyn8}DnJrEaPE_sGf5 z7`Pvwl_F!CR?5y3?m6SGXm{nj=O7*m<3TU&wE^C$fzM%>_ve@)6VuHtX>xwVz+pU0 zehj3Foq8)9-PQ;`PlGq+ZE>EYSSuLpUMBq_x!TNTg%=SV<@;hiU2q+f)uOoM?AFu? znK=HF z8~QR(naYLv3hAmzu^U(P zd{>VB)xcMAn~eT$;A6Or(4?$>0vPSem#nlY-IVXJu{ziwMIOiTfS<4J0N-_d<8c<| z%*XmsB`>}5Jjxm0d*kMO*_G$hxE{fD%%Ef@HC#v?@~7?yVm*g2`$5btj}qlk%@Hi% z>Y@jce1MyMIG&#|8UsEXqf|c|)3A_gPtdENa=uO7D;oK&C=b>j!ii(Y#YXV395I{? zI0+|nRwZqe60z@QdMy^V9>m=G!>IFNKW56mn9l879=4VNb(J+hEJPTI;B zZa&0?qXp&_LyY%oZp={U;(d5ODX>t@!v`qysaD#gM~JDl_#kESJ82a@#8CkAsO=V7 z7eozrewd>W>UiU=WOSHTrSTDtBDAYb^RG&?ZVw`(bTS-9cNpvRCkNe%x$_#r?9)s)Tgq+g(PepxEa3+T$x zBnd82W<>}xb|*19D_I6EW(l|+?Kr^4?p^%8pR&W~WE3~x1#HyJIh`kn7`~{hkB@A7 zgc{gK@hrZ?k&jw7a2gn(zK0$niXPmkyC!?H66 zj*^vXl*Nsl;T~6}cTO8+Oe>Fa%4Ren#7&%w!NrOYbqwK>B_6cv>z9rNh{$&iVG%>- zL{AD>G7%WY*Y=}|(r@ss;X?mRo{T_^D%a`$W7q6M+laR=SbE7UuFrSjwM; zXJRW6667ypreDI$xfB`hvv7F@-bEL01O50GzD>Ln1Wr))QPWC1PMM#&W@@|x8t)_} zblk<;Ir#)nI{TmCs>C3Uz-yRJME{NX4!kx0M3Pvt#d9;$1kaN;(jALqjngkJ%*_!8 zv?XQG)^o|jRC-wTotUQESaRtwzPH5#@JWxQPm-n8WNGXN!+3f^VhM&$w>zHI1Tcx= zHg?p0G-CiOh}2rnJNfI?!l1^>^ZLi+qHwMsKhgYL%)%pF^l7hWz&Fa08}Mkim=6CG zKjZ4SW{7)99&!DIiOta8HiXb#zjDj{=qn5EE1T*x<^l`*m2A2eAueJbzmG$yBBUqa z=QIc0HOno&ak}c1+=aZ>ExwF^IN$`U2ENx3cWNio!iRtk**XSFUK-D^{!jGah#R~v z-K>5&g6Br?A`y@MmT0qObu1ad?*irG_XjSGg|@Q%`P;%H_!ArE2>zl!|0cn4UQZ|U za_~3yJY0?WxQ0+)OHy7(|Mu~XaRdJ+tWlHp!s;p M^I72^`i@ur13P9c761SM literal 0 HcmV?d00001 diff --git a/Project/bin/yelpDataProcessing/reviewJson.class b/Project/bin/yelpDataProcessing/reviewJson.class new file mode 100644 index 0000000000000000000000000000000000000000..86e2bfbd6b996753be355729cce886a4a038cacf GIT binary patch literal 4854 zcma)9d3+RS8GgP!GLy|fl7&##5=qk*a*!--AxIJsLO8bMpag?ytuWc0gbkbBbaxg) zTdTFLT94X#f>s4l<57=N-6V~+s1@2;tyHSD_0}G0wfe96BR=2EY_i!6D!*UYo$q+R z_kE7{n|<=a!;b*C27goF5~w_4#75e6Q{R?I9~- zkD26=EgQK@Z4Jkw@u;~`z+F?@E8y9Zh!_eM2$UC`;)8}#4Q22#n^tREF$DY?yeJYV z?TE&WuJmxfk=&#A$C!t|BN5hPy?Qb#pRGY3+$iNS=7A_nS>Ewqp6K&~%0V8Qdb{l%cNGhme z&e^$b(U@V6`>-67N6VC6i~_a>vJj9!};Jv&}6aEWzl z4m)k>XpAYTXdp@Hl#y(VM3X)Qag~DAbM(=3BxM@I8m`7Q0wsfnNmY#)N%M%niur&l z#LkDcSf`+AZdgks8s32Q0?HvhmNvQv@#WnLLRfB3DR$#lN zaK{`9Z7xWm8=|UC#ZCd?lWRLQbRi(%JuF$20@$VDI_tP3RUQ0@9|-erqXL84b)gg5e+GWB#)kDC^U z3JwtNtZ8igv$h$Oa3935f*9L_Tr+A5LH0QfaU=w^Fkv(urwP;+y2`>OXQSh`65Is( zoXAz=MYb1cFp*|m3?Iq%(10V0oM?`QRS;s2d1S;W!4ce|;O4@(_EHVE;%#h~blU&6 zgA-lB+Z~W{PVSYSD#2}NQ}Ir!x7v0FAzO<(HQa@F(G|u`X+1{W%;|Yf)Y|J+yoUno zPxA_)y7omQ3f^0ow@pe3?#BByd_YRKn919tvZsGg!@cq_HJpf>dNiJr8GJ~?hviZa z`@nE98_!2Id<-9F;fXX0ub5HmE?y=wn4u51s5r(jTcixDnyaKYKPl~fzreDZP@yAd zP5fybSMZsfmYow-!wG3XWkenni(5CLY}f3X7m)^+B%E8bV2Ohr3+pFd?AG})ELv!~ zBEo8|CldDb4>Oe8gYO~cnEyly?h)eq8}U5Uo3#6T67 zS}19FNW(YrE#{Pp-b^F&DyD^91DgrRNZPskWE(XT@>WtYDNq$`Y@Bnd(^&UKxK1`# z6;m|bY&s3{7E$pXvTs+$vsFAwVC4jpRq>vU-cz;3}-bw zjwh(ZNFvRP-a^o-qTPod;ztUensc?ZTdU;N@?$CN_FQ4ua}?4~n3z1<(a|$fM?cNG z9kM>KwODw2GR(75!R&o1eoji|H(B*Bh_274;ss{O1E|@g5!2}7@iu^50tSy?o084NQ7Zx*6 zfPr7f?;g%vD$f_&nU9wvw@#t5Zwkx%81AxhT;4H-6@0GYUtOPn)dU(Rux1?1{+0|j zWUzT0TgS1j)6?V`LtTZ(A3BNRJ3V3ym8;~A250rP_Ua5eI9Z=TcVE{T__%)eiRq_S zWzfU1UYWhyD)?#|8Cb?LE78RF!S$#@E3w>yYJ_kF_Tx%~IhH~V4x<*gu=h!DHWDh< zfW3G#Kg$q9AFgMOP1uGT@D^6xij}yLvu;FrlKpKx0WZ$ zGs|h0#cU&y50bIf=;k~0b^MjK2Wtq~S`3k;JFpJ-u~Qu-<0sIJ2hf5uB;yILdj^~F z3fk~0+4?)$Exrc`buGK0WEqV3S&L=Cm}!e!7oMhKB+D+Ib=+dv&C?EWxm_gZ!#(yvL6WYPWx(dL>APR&3*3w1S926oWsA(pU{ z&836j?qpBzB6!_|VV4D89U)kX?Z{YmsUi5bUFz8qY|vV9x$|v5BbLK;0rgp9toe*}|26>%_!!nd+k3e0ks0Ne1WM#SEuh)^7TEhv%Y6$9_I+<=-@?Gc(|D}8G*CK; zGXfKMvboIO`n23ymchBXN1mVAS{^8uNBRSP`^ZK1zMsi`r1G5qIU9!DHhK2K1=j%2TOfp#=!7X^w3}?d=TRX^ z&$eH2qob!LvCe6~oA7T-Pj1mhRXXigtZ~}!2C*8yWS(AT`kH0YBIf&&Wl@D$KWAA~ zW$x!Jix#tj$1RKcSi^DriUuxWCCBh64Xm-Ad+}?=Tfoc?;5VFI$hvg=7QbWbewjZX ze~&+)jP*6(kDM)Ml`HTf_xf3DC7$KpMV9#+rTI6ZD64(zQs9HU1pbvX3}Fq@7B4Y= z-Yviu entity_ids; // List of entities participating in Relation ' relation_id ' + boolean truth; // Truth Value for this data element + + public Cell(){ + entity_ids = new ArrayList(); + } + + public Cell(String r, String e1, String e2, boolean t){ + entity_ids = new ArrayList(); + relation_id = r; + entity_ids.add(e1); + entity_ids.add(e2); + truth = t; + } + +} diff --git a/Project/src/logisticCMF/Eval.java b/Project/src/logisticCMF/Eval.java new file mode 100644 index 0000000..5fd2542 --- /dev/null +++ b/Project/src/logisticCMF/Eval.java @@ -0,0 +1,157 @@ +package logisticCMF; + +import java.io.IOException; +import java.math.BigDecimal; +import java.math.RoundingMode; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +public class Eval { + static Map relTrue = new HashMap(); // Correct predictions per relation + static Map relCount = new HashMap(); // Total test size per relation + static Map relL2 = new HashMap(); + + static Map relActualTruth = new HashMap(); // Actual Truth values in each relation + static Map relPredTruth = new HashMap(); // Truth predicted in each relation + static Map relTruthCorrect = new HashMap(); // Correct Truth predictions in each relation + + static Map> relEvalMap = new HashMap>(); //Map [Rel,] Also contains "weighted average" as relation + + + public static Map> getEvalMap(data Data, embeddings e, String set){ + refreshMaps(); + if(set.equals("test")){ + for(Cell cell : Data.testData) + updateTestMaps(cell, e); + } + else{ + for(Cell cell : Data.valData) + updateTestMaps(cell, e); + } + makeRelationEvalMap(); // Final Map [Relation, ] + return relEvalMap; + } + + public static void refreshMaps(){ + relTrue = new HashMap(); + relCount = new HashMap(); + relL2 = new HashMap(); + relActualTruth = new HashMap(); // Actual Truth values in each relation + relPredTruth = new HashMap(); // Truth predicted in each relation + relTruthCorrect = new HashMap(); // Correct Truth predictions in each relation + relEvalMap = new HashMap>(); + } + + public static void addInRelCountMap(Cell cell){ + if(!relCount.containsKey(cell.relation_id)) + relCount.put(cell.relation_id, 1); + else + relCount.put(cell.relation_id, relCount.get(cell.relation_id)+1); + } + + public static void addInprfRelationMap(String relation, Integer actual, Integer pred, Integer correct){ + if(!relActualTruth.containsKey(relation)){ + relActualTruth.put(relation, 0); + relPredTruth.put(relation, 0); + relTruthCorrect.put(relation, 0); + } + + else{ + if(!(actual == 0 && pred == 0)){ + relActualTruth.put(relation, relActualTruth.get(relation) + actual); + relPredTruth.put(relation, relPredTruth.get(relation) + pred); + relTruthCorrect.put(relation, relTruthCorrect.get(relation) + correct); + } + } + } + + public static void addInRelAccuracyMap(Cell cell, Integer correct){ + if(!relTrue.containsKey(cell.relation_id)) + relTrue.put(cell.relation_id, correct); + else + relTrue.put(cell.relation_id, relTrue.get(cell.relation_id)+correct); + } + + public static void addInRelL2Map(Cell cell, double l2){ + if(!relL2.containsKey(cell.relation_id)) + relL2.put(cell.relation_id, l2); + else + relL2.put(cell.relation_id, relTrue.get(cell.relation_id) + l2); + } + + //Map [Rel, ] + public static void makeRelationEvalMap(){ + //makeTestMaps(Data, e); + double f = 0; double wf1=0.0, waccuracy=0.0, wp = 0.0, wr = 0.0; int total = 0; + for(String rel : relActualTruth.keySet()){ + double accuracy = ((double)relTrue.get(rel))/relCount.get(rel); + double precision = (double)relTruthCorrect.get(rel) / relPredTruth.get(rel) ; + double recall = (double) relTruthCorrect.get(rel) / relActualTruth.get(rel) ; + double f1 = 2*precision*recall / (precision + recall) ; + //System.out.println("a : " + accuracy + " p : " + precision + " r : " + recall + " f1 : " +f1); + relEvalMap.put(rel, new ArrayList()); + relEvalMap.get(rel).add(round(accuracy, 3)); // Accuracy + relEvalMap.get(rel).add(round(precision, 3)); // Precision + relEvalMap.get(rel).add(round(recall, 3)); // Recall + relEvalMap.get(rel).add(round(f1, 3)); // F1 + wf1 += (relCount.get(rel)*f1); + wp += (relCount.get(rel)*precision); + wr += (relCount.get(rel)*recall); + waccuracy += (relCount.get(rel)*accuracy); + total += relCount.get(rel); + } + wf1 = round(wf1/total, 3); waccuracy = round(waccuracy/total, 3); wp = round(wp/total, 3); wr = round(wr/total, 3); + relEvalMap.put("average", new ArrayList()); + relEvalMap.get("average").add(round(waccuracy, 3)); // Accuracy + relEvalMap.get("average").add(round(wp, 3)); // Precision + relEvalMap.get("average").add(round(wr, 3)); // Recall + relEvalMap.get("average").add(round(wf1, 3)); // F1 + + } + + public static void updateTestMaps(Cell cell, embeddings e){ + int correct = 0; int t =0, f=0; int c = 0; double l2Sum = 0.0; + double dot = e.dot(cell, learner.enableBias, e.K, learner.ealpha, learner.onlyAlpha); + double sigmdot = learner.sigm(dot); + int pred = (sigmdot >= 0.5) ? 1 : 0; + int truth = (cell.truth) ? 1 : 0; + double l2 = (truth - sigmdot)*(truth-sigmdot); + l2Sum += l2; + //System.out.println(sigmdot + " " + pred + " " + truth + " " + l2); + + if(pred == truth) + correct = 1; + else + correct = 0; + c += correct; + //System.out.println("rel : " + truth + " pred : " + pred); + //System.out.println(cell.relation_id + " : " + pred); + addInprfRelationMap(cell.relation_id, truth, pred, correct); + addInRelAccuracyMap(cell, correct); + addInRelCountMap(cell); + addInRelL2Map(cell, l2); + } + + public static void printEval(){ + for(String rel : relEvalMap.keySet()){ + ArrayList eval = relEvalMap.get(rel); + System.out.print(rel + " : "); + System.out.println("P : " + eval.get(1) + " R : " + eval.get(2) + " F1 : " + eval.get(3) + " Accuracy : " + eval.get(0)); + } + } + + + public static double round(double value, int places) { + if (places < 0) throw new IllegalArgumentException(); + if(Double.isNaN(value)) + return Double.NaN; + + BigDecimal bd = new BigDecimal(value); + bd = bd.setScale(places, RoundingMode.HALF_UP); + return bd.doubleValue(); + } + + + +} diff --git a/Project/src/logisticCMF/Rating.java b/Project/src/logisticCMF/Rating.java new file mode 100644 index 0000000..00d1b26 --- /dev/null +++ b/Project/src/logisticCMF/Rating.java @@ -0,0 +1,177 @@ +package logisticCMF; +import java.io.*; +import java.util.*; + + + +public class Rating { + + public Map> ratings; // Map[busId, Map[User, Rating]] + public String loc; + data [] busRate = new data[4]; + + public Rating(String folder, double valp, double testp) throws IOException{ + ratings = new HashMap>(); + loc = folder; + busRate[0] = new data(); + busRate[1] = new data(); + busRate[2] = new data(); + busRate[3] = new data(); + readRatingData(folder); + makeTwoRatingDataCells(valp, testp); + /*for(data rd : busRate){ + rd.dataStats(); + }*/ + } + + public void testRatings(){ + data mergeData = new data(); + ArrayList tomerge = new ArrayList(); + for(data d : busRate) + tomerge.add(d); + mergeData.addDataAfterSplit(tomerge); + mergeData.dataStats(); + codeTest.learnAndTest(mergeData, 30, false, 0, false, 0); + } + + public void readRatingData(String folder) throws NumberFormatException, IOException{ + String address = System.getProperty("user.dir")+"/../Dataset/data/"+ folder +"/reviews.txt"; + BufferedReader br = new BufferedReader(new FileReader(address)); + String line; int countl = 0, countcell = 0; + String busid = null, userid = null; boolean value = false; + while((line = br.readLine()) != null){ + countl++; + String[] array = line.split(":"); + if( array[0].trim().equals("bus_id")){ + busid = array[1].trim(); + if(!ratings.containsKey(busid)) + ratings.put(busid, new HashMap()); + } + if( array[0].trim().equals("user_id")) + userid = array[1].trim(); + if( array[0].trim().equals("star")){ + double t = Double.parseDouble(array[1].trim()); + /*if(ratings.get(busid).containsKey(userid)){ + int r = ratings.get(busid).get(userid); + System.out.println("user already exists " + busid + " " + userid); + + }*/ + ratings.get(busid).put(userid, (int)t); + countcell++; + } + } + br.close(); + //System.out.println("Ratings : " + countcell); + + } + + public void makeTwoRatingDataCells(double valPerc, double testPerc){ + String relation = "busrate-"+loc+"-"; + int count2 = 0; int count = 0; + // Read all rating data from Map and create busRate data object for threshold = 2 + for(String bus : ratings.keySet()){ + for(String user : ratings.get(bus).keySet()){ + int rate = ratings.get(bus).get(user); + count++; + Cell cell = new Cell(); + cell.relation_id = relation+"2"; + cell.entity_ids.add(bus); + cell.entity_ids.add(user); + if(rate >= 2){ + cell.truth = true; + busRate[0].Data.add(cell); + } + else{ + cell.truth = false; + busRate[0].Data.add(cell); + } + } + } + busRate[0].splitTrainTestValidation(valPerc, testPerc); + //busRate[0].dataStats(); + makeRestRatingDataCells(); + + System.out.println("Ratings : " + busRate[0].Data.size()); + } + + public void makeRestRatingDataCells(){ + String r = "busrate-"+loc+"-"; + for(Cell cell : busRate[0].trainData){ + String b = cell.entity_ids.get(0), u = cell.entity_ids.get(1); + int rate = ratings.get(b).get(u); + if(rate <= 2) + addCellsinRestTrain(r, b, u, false, false, false); + + if(rate == 3) + addCellsinRestTrain(r, b, u, true, false, false); + + if(rate == 4) + addCellsinRestTrain(r, b, u, true, true, false); + + if(rate == 5) + addCellsinRestTrain(r, b, u, true, true, true); + } + + for(Cell cell : busRate[0].testData){ + String b = cell.entity_ids.get(0), u = cell.entity_ids.get(1); + int rate = ratings.get(b).get(u); + if(rate <= 2) + addCellsinRestTest(r, b, u, false, false, false); + if(rate == 3) + addCellsinRestTest(r, b, u, true, false, false); + if(rate == 4) + addCellsinRestTest(r, b, u, true, true, false); + if(rate == 5) + addCellsinRestTest(r, b, u, true, true, true); + } + + for(Cell cell : busRate[0].valData){ + String b = cell.entity_ids.get(0), u = cell.entity_ids.get(1); + int rate = ratings.get(b).get(u); + if(rate <= 2) + addCellsinRestVal(r, b, u, false, false, false); + if(rate == 3) + addCellsinRestVal(r, b, u, true, false, false); + if(rate == 4) + addCellsinRestVal(r, b, u, true, true, false); + if(rate == 5) + addCellsinRestVal(r, b, u, true, true, true); + } + + } + + public void addCellsinRestTrain(String r, String b, String u, boolean t3, boolean t4, boolean t5){ + busRate[1].Data.add(new Cell(r+3, b, u, t3)); + busRate[1].trainData.add(new Cell(r+3, b, u, t3)); + + busRate[2].Data.add(new Cell(r+4, b, u, t4)); + busRate[2].trainData.add(new Cell(r+4, b, u, t4)); + + busRate[3].Data.add(new Cell(r+5, b, u, t5)); + busRate[3].trainData.add(new Cell(r+5, b, u, t5)); + } + + public void addCellsinRestTest(String r, String b, String u, boolean t3, boolean t4, boolean t5){ + busRate[1].Data.add(new Cell(r+3, b, u, t3)); + busRate[1].testData.add(new Cell(r+3, b, u, t3)); + + busRate[2].Data.add(new Cell(r+4, b, u, t4)); + busRate[2].testData.add(new Cell(r+4, b, u, t4)); + + busRate[3].Data.add(new Cell(r+5, b, u, t5)); + busRate[3].testData.add(new Cell(r+5, b, u, t5)); + } + + public void addCellsinRestVal(String r, String b, String u, boolean t3, boolean t4, boolean t5){ + busRate[1].Data.add(new Cell(r+3, b, u, t3)); + busRate[1].valData.add(new Cell(r+3, b, u, t3)); + + busRate[2].Data.add(new Cell(r+4, b, u, t4)); + busRate[2].valData.add(new Cell(r+4, b, u, t4)); + + busRate[3].Data.add(new Cell(r+5, b, u, t5)); + busRate[3].valData.add(new Cell(r+5, b, u, t5)); + } + +} + diff --git a/Project/src/logisticCMF/Util.java b/Project/src/logisticCMF/Util.java new file mode 100644 index 0000000..4e4ef05 --- /dev/null +++ b/Project/src/logisticCMF/Util.java @@ -0,0 +1,156 @@ +package logisticCMF; + +import java.io.IOException; +import java.util.ArrayList; + + + +/*import org.jfree.chart.ChartFactory; +import org.jfree.chart.ChartFrame; +import org.jfree.chart.JFreeChart; +import org.jfree.chart.axis.ValueAxis; +import org.jfree.chart.plot.PlotOrientation; +import org.jfree.chart.plot.XYPlot; +import org.jfree.data.xy.XYSeries; +import org.jfree.data.xy.XYSeriesCollection;*/ +import java.util.*; + +//import postProcessing.data; + + +public class Util { + + static Random seed = new Random(100); + + public static HashSet getColdEntites(ArrayList data, int index, double perc){ + HashSet entities = new HashSet(); + for(Cell cell : data) + entities.add(cell.entity_ids.get(index)); + + ArrayList ents = new ArrayList(entities); + Collections.shuffle(ents, seed); + HashSet coldEntities = new HashSet(ents.subList(0, (int)(perc*entities.size()/100.0))); + return coldEntities; + + } + + public static int getNegSampleSize(data Data){ + return (int)(((double)Data.Data.size())/Data.entityIds.get(0).size()); + } + + public static double getMatrixDetails(data Data){ + System.out.println("rows : " + Data.entityIds.get(0).size()); + System.out.println("cols : " + Data.entityIds.get(1).size()); + System.out.println("size : " + Data.Data.size()); + return ((double)Data.Data.size())/Data.entityIds.get(0).size(); + + } + + /*public static void plotGraph(ArrayList x, ArrayList y, String filename) throws IOException{ + XYSeries series = new XYSeries(filename); + for(int i =0; i entities = new HashSet(); + int index = 0, flag = 0; + for(Cell cell : D.trainData){ + entities.add(cell.entity_ids.get(index)); + } + for(Cell cell : D.valData){ + entities.add(cell.entity_ids.get(index)); + } + for(Cell cell : D.testData){ + if(entities.contains(cell.entity_ids.get(index))){ + System.out.print("WRONG COLD START SPLIT, Index : " + index + ", "); + flag = 1; + break; + } + } + if(flag == 0){ + System.out.print("Index ColdStart : " + index + ", "); + } + + index = 1; flag = 0; + entities = new HashSet(); + for(Cell cell : D.trainData){ + entities.add(cell.entity_ids.get(index)); + } + for(Cell cell : D.valData){ + entities.add(cell.entity_ids.get(index)); + } + for(Cell cell : D.testData){ + if(entities.contains(cell.entity_ids.get(index))){ + System.out.println("NO COLD START SPLIT, Index : " + index); + flag = 1; + break; + } + } + if(flag == 0) + System.out.println("Index ColdStart : " + index); + + } + + public static void countEntities(ArrayList data){ + HashSet e1 = new HashSet(); + HashSet e2 = new HashSet(); + + for(Cell cell : data){ + e1.add(cell.entity_ids.get(0)); + e2.add(cell.entity_ids.get(1)); + } + + System.out.println("e1 : " + e1.size() + " e2 : "+e2.size()); + + } + + public static void implicitColdStart(ArrayList trdata, ArrayList testData){ + HashSet tre1 = new HashSet(); + HashSet tre2 = new HashSet(); + + for(Cell cell : trdata){ + tre1.add(cell.entity_ids.get(0)); + tre2.add(cell.entity_ids.get(1)); + } + + System.out.println("tre1 : " + tre1.size() + " tre2 : "+ tre2.size()); + + HashSet tee1 = new HashSet(); + HashSet tee2 = new HashSet(); + for(Cell cell : testData){ + tee1.add(cell.entity_ids.get(0)); + tee2.add(cell.entity_ids.get(1)); + } + + System.out.println("tee1 : " + tee1.size() + " tee2 : "+ tee2.size()); + tee1.removeAll(tre1); + tee2.removeAll(tre2); + + + System.out.println("e1 cold : " + tee1.size() + " e2 cold : " + tee2.size() ); + + } +} diff --git a/Project/src/logisticCMF/codeTest.java b/Project/src/logisticCMF/codeTest.java new file mode 100644 index 0000000..e3460a1 --- /dev/null +++ b/Project/src/logisticCMF/codeTest.java @@ -0,0 +1,676 @@ +package logisticCMF; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Map; +import java.util.Random; + +public class codeTest { + + static Random rand = new Random(); + + private static final long MEGABYTE = 1024L * 1024L; + public static long bytesToMegabytes(long bytes) { + return bytes / MEGABYTE; + } + public static void getMemoryDetails(){ + // Get the Java runtime + Runtime runtime = Runtime.getRuntime(); + // Run the garbage collector + //runtime.gc(); + // Calculate the used memory + System.out.println("Total memory : " + bytesToMegabytes(runtime.totalMemory()) + " Free memory : " + bytesToMegabytes(runtime.freeMemory()) + + " Used memory is megabytes: " + bytesToMegabytes(runtime.totalMemory() - runtime.freeMemory())); + + } + + // Input Data, that is split in Train, Validation and Test. Input K. Learn and stop on convergence. Then test with learnt no. of epochs. + public static embeddings learnAndTest(data Data, int K, boolean busWord, int busWordNegSamSize, boolean userWord, int userWordNegSamSize){ + embeddings e = new embeddings(Data, K); + learner l = new learner(); + embeddings eBest = l.learnAndStop1(Data, e, false, false, false, busWord, busWordNegSamSize, userWord, userWordNegSamSize); + System.out.println("learning done"); + Eval.getEvalMap(Data, eBest, "test"); + Eval.printEval(); + return eBest; + } + + public static data readAttributes(String folder, double valPerc, double testPerc, boolean coldStart, int index) throws IOException{ + data att = new data(); + att.readBusAtt(System.getProperty("user.dir")+"/../Dataset/data/"+ folder +"/busAtt.txt", folder); + if(coldStart) + att.splitColdStart(valPerc, testPerc, index); + else + att.splitTrainTestValidation(valPerc, testPerc); + + return att; + } + + public static data readCategories(String folder, int pruneThresh) throws IOException{ + data cat = new data(); + cat.readAndCompleteCategoryData(System.getProperty("user.dir")+"/../Dataset/data/"+ folder +"/busCat.txt", pruneThresh, folder); + cat.splitTrainTestValidation(0.0, 0.0); + return cat; + } + + public static data readRatings(String folder, double valPerc, double testPerc, boolean coldStart, int index) throws IOException{ + data rate = new data(); + rate.readRatingData(System.getProperty("user.dir")+"/../Dataset/data/"+ folder +"/reviews.txt", folder); + if(coldStart) + rate.splitColdStart(valPerc, testPerc, index); + else + rate.splitTrainTestValidation(valPerc, testPerc); + return rate; + } + + public static data readReviewData(String folder, int occThresh, boolean busWord, boolean userWord, double valPerc, double testPerc) throws IOException{ + data rD = new data(); + rD.readReviewData(System.getProperty("user.dir")+"/../Dataset/data/"+ folder +"/reviews_textProc.txt"); // Makes EnWord maps (Business and User Word maps) and Word-Count map + rD.pruneVocab_EntityMap(occThresh); + if(busWord) + rD.makeEnWordCells("b-word"); + if(userWord) + rD.makeEnWordCells("u-word"); + rD.splitTrainTestValidation(valPerc, testPerc); + return rD; + } + + public static void completeEvaluation(String folder, data A, data C, data R, data W, boolean busWord, int bwNS, boolean userWord, int uwNS, + boolean attCold, int coldIndexAtt, boolean rateCold, int coldIndexRate, String folderToWriteData) throws IOException{ + + System.out.print("Attribute Cold Start, "); + Util.checkColdStartSanity(A); + Util.implicitColdStart(A.trainData, A.testData); + System.out.print("Rate Cold Start, "); + Util.checkColdStartSanity(R); + Util.implicitColdStart(R.trainData, R.testData); + + ArrayList tomerge = new ArrayList(); + data mergeData = new data(); + + getMemoryDetails(); + + System.out.println("################################################### "+folder+" - Att-Word ######################################"); + tomerge.clear(); + tomerge.add(A); + tomerge.add(W); + mergeData = new data(); + mergeData.busWord = W.busWord; + mergeData.words = W.words; + mergeData.wordCount = W.wordCount; + mergeData.userWord = W.userWord; + mergeData.addDataAfterSplit(tomerge); + mergeData.dataStats(); + embeddings e = learnAndTest(mergeData, 30, busWord, bwNS, userWord, uwNS); + if(!(busWord || userWord)) + writeDataToFile.writePrediction(folderToWriteData+"A-A", folder, A, e); + if(busWord && !userWord) + writeDataToFile.writePrediction(folderToWriteData+"A-A+BW", folder, A, e); + if(userWord && !busWord) + writeDataToFile.writePrediction(folderToWriteData+"A-A+UW", folder, A, e); + if(userWord && busWord) + writeDataToFile.writePrediction(folderToWriteData+"A-A+BUW", folder, A, e); + getMemoryDetails(); + + System.out.println("################################################### "+folder+" - Att-Cat-Word ######################################"); + //rD = readReviewData(folder, 10, busWord, false, 0.0, 0.0); + tomerge = new ArrayList(); + tomerge.clear(); + tomerge.add(A); + tomerge.add(C); + tomerge.add(W); + mergeData = new data(); + mergeData.busWord = W.busWord; + mergeData.words = W.words; + mergeData.wordCount = W.wordCount; + mergeData.userWord = W.userWord; + mergeData.addDataAfterSplit(tomerge); + mergeData.dataStats(); + e = learnAndTest(mergeData, 30, busWord, bwNS, userWord, uwNS); + if(!(busWord || userWord)) + writeDataToFile.writePrediction(folderToWriteData+"A-A+C", folder, A, e); + if(busWord && !userWord) + writeDataToFile.writePrediction(folderToWriteData+"A-A+C+BW", folder, A, e); + if(userWord && !busWord) + writeDataToFile.writePrediction(folderToWriteData+"A-A+C+UW", folder, A, e); + if(busWord && userWord) + writeDataToFile.writePrediction(folderToWriteData+"A-A+C+BUW", folder, A, e); + System.gc(); + + System.out.println("################################################### "+folder+" - Rating - Word ######################################"); + tomerge = new ArrayList(); + tomerge.clear(); + tomerge.add(R); + tomerge.add(W); + mergeData = new data(); + mergeData.busWord = W.busWord; + mergeData.words = W.words; + mergeData.wordCount = W.wordCount; + mergeData.userWord = W.userWord; + mergeData.addDataAfterSplit(tomerge); + mergeData.dataStats(); + e = learnAndTest(mergeData, 30, busWord, bwNS, userWord, uwNS); + if(!(busWord || userWord)) + writeDataToFile.writePrediction(folderToWriteData+"R-R", folder, R, e); + if(busWord && !userWord) + writeDataToFile.writePrediction(folderToWriteData+"R-R+BW", folder, R, e); + if(userWord && !busWord) + writeDataToFile.writePrediction(folderToWriteData+"R-R+UW", folder, R, e); + if(busWord && userWord) + writeDataToFile.writePrediction(folderToWriteData+"R-R+BUW", folder, R, e); + System.gc(); + + System.out.println("################################################### "+folder+" - Rating - Cat - Word ######################################"); + tomerge = new ArrayList(); + tomerge.clear(); + tomerge.add(R); + tomerge.add(C); + tomerge.add(W); + mergeData = new data(); + mergeData.busWord = W.busWord; + mergeData.words = W.words; + mergeData.wordCount = W.wordCount; + mergeData.userWord = W.userWord; + mergeData.addDataAfterSplit(tomerge); + mergeData.dataStats(); + e = learnAndTest(mergeData, 30, busWord, bwNS, userWord, uwNS); + if(!(busWord || userWord)) + writeDataToFile.writePrediction(folderToWriteData+"R-R+C", folder, R, e); + if(busWord && !userWord) + writeDataToFile.writePrediction(folderToWriteData+"R-R+C+BW", folder, R, e); + if(userWord && !busWord) + writeDataToFile.writePrediction(folderToWriteData+"R-R+C+UW", folder, R, e); + if(busWord && userWord) + writeDataToFile.writePrediction(folderToWriteData+"R-R+C+BUW", folder, R, e); + System.gc(); + + System.out.println("################################################### "+folder+" - Att Rate - Word ######################################"); + tomerge = new ArrayList(); + tomerge.clear(); + tomerge.add(A); + tomerge.add(R); + tomerge.add(W); + mergeData = new data(); + mergeData.busWord = W.busWord; + mergeData.words = W.words; + mergeData.wordCount = W.wordCount; + mergeData.userWord = W.userWord; + mergeData.addDataAfterSplit(tomerge); + mergeData.dataStats(); + e = learnAndTest(mergeData, 30, busWord, bwNS, userWord, uwNS); + if(!(busWord || userWord)){ + writeDataToFile.writePrediction(folderToWriteData+"A-A+R", folder, A, e); + writeDataToFile.writePrediction(folderToWriteData+"R-A+R", folder, R, e); + } + if(busWord && !userWord){ + writeDataToFile.writePrediction(folderToWriteData+"A-A+R+BW", folder, A, e); + writeDataToFile.writePrediction(folderToWriteData+"R-A+R+BW", folder, R, e); + } + if(userWord && !busWord){ + writeDataToFile.writePrediction(folderToWriteData+"A-A+R+UW", folder, A, e); + writeDataToFile.writePrediction(folderToWriteData+"R-A+R+UW", folder, R, e); + } + if(userWord && busWord){ + writeDataToFile.writePrediction(folderToWriteData+"A-A+R+BUW", folder, A, e); + writeDataToFile.writePrediction(folderToWriteData+"R-A+R+BUW", folder, R, e); + } + System.gc(); + + + System.out.println("################################################### "+folder+" - Att Cat Rate Word ######################################"); + tomerge = new ArrayList(); + tomerge.clear(); + tomerge.add(A); + tomerge.add(R); + tomerge.add(C); + tomerge.add(W); + mergeData = new data(); + mergeData.busWord = W.busWord; + mergeData.words = W.words; + mergeData.wordCount = W.wordCount; + mergeData.userWord = W.userWord; + mergeData.addDataAfterSplit(tomerge); + mergeData.dataStats(); + e = learnAndTest(mergeData, 30, busWord, bwNS, userWord, uwNS); + if(!(busWord || userWord)){ + writeDataToFile.writePrediction(folderToWriteData+"A-A+C+R", folder, A, e); + writeDataToFile.writePrediction(folderToWriteData+"R-A+C+R", folder, R, e); + } + if(busWord && !userWord){ + writeDataToFile.writePrediction(folderToWriteData+"A-A+C+R+BW", folder, A, e); + writeDataToFile.writePrediction(folderToWriteData+"R-A+C+R+BW", folder, R, e); + } + if(userWord && !busWord){ + writeDataToFile.writePrediction(folderToWriteData+"A-A+C+R+UW", folder, A, e); + writeDataToFile.writePrediction(folderToWriteData+"R-A+C+R+UW", folder, R, e); + } + if(busWord && userWord){ + writeDataToFile.writePrediction(folderToWriteData+"A-A+C+R+BUW", folder, A, e); + writeDataToFile.writePrediction(folderToWriteData+"R-A+C+R+BUW", folder, R, e); + } + getMemoryDetails(); + + if(busWord || userWord){ + if(busWord && !userWord){ + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"words-bw", W, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"attributes-bw", A, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"categories-bw", C, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"business-bw", R, 0, e); + } + if(userWord && !busWord){ + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"words-uw", W, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"attributes-uw", A, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"categories-uw", C, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"business-uw", R, 0, e); + } + if(userWord && busWord){ + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"words-buw", W, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"attributes-buw", A, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"categories-buw", C, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"business-buw", R, 0, e); + } + } + + } + + public static void AttBusColdCompleteEvaluation(String folder, data A, data C, data R, data W, boolean busWord, int bwNS, boolean userWord, int uwNS, String folderToWriteData) throws IOException{ + System.out.print("Attribute Cold Start, "); + Util.checkColdStartSanity(A); + Util.implicitColdStart(A.trainData, A.testData); + + ArrayList tomerge = new ArrayList(); + data mergeData = new data(); + + getMemoryDetails(); + + System.out.println("################################################### "+folder+" - Att-Word ######################################"); + tomerge.clear(); + tomerge.add(A); + tomerge.add(W); + mergeData = new data(W); + mergeData.addDataAfterSplit(tomerge); + mergeData.dataStats(); + embeddings e = learnAndTest(mergeData, 30, busWord, bwNS, userWord, uwNS); + if(!(busWord || userWord)) + writeDataToFile.writePrediction(folderToWriteData+"A-A", folder, A, e); + if(busWord && !userWord) + writeDataToFile.writePrediction(folderToWriteData+"A-A+BW", folder, A, e); + if(userWord && !busWord) + writeDataToFile.writePrediction(folderToWriteData+"A-A+UW", folder, A, e); + if(userWord && busWord) + writeDataToFile.writePrediction(folderToWriteData+"A-A+BUW", folder, A, e); + getMemoryDetails(); + + System.out.println("################################################### "+folder+" - Att-Cat-Word ######################################"); + tomerge = new ArrayList(); + tomerge.clear(); + tomerge.add(A); + tomerge.add(C); + tomerge.add(W); + mergeData = new data(W); + mergeData.addDataAfterSplit(tomerge); + mergeData.dataStats(); + e = learnAndTest(mergeData, 30, busWord, bwNS, userWord, uwNS); + if(!(busWord || userWord)) + writeDataToFile.writePrediction(folderToWriteData+"A-A+C", folder, A, e); + if(busWord && !userWord) + writeDataToFile.writePrediction(folderToWriteData+"A-A+C+BW", folder, A, e); + if(userWord && !busWord) + writeDataToFile.writePrediction(folderToWriteData+"A-A+C+UW", folder, A, e); + if(busWord && userWord) + writeDataToFile.writePrediction(folderToWriteData+"A-A+C+BUW", folder, A, e); + System.gc(); + + System.out.println("################################################### "+folder+" - Att Rate - Word ######################################"); + tomerge = new ArrayList(); + tomerge.clear(); + tomerge.add(A); + tomerge.add(R); + tomerge.add(W); + mergeData = new data(W); + mergeData.addDataAfterSplit(tomerge); + mergeData.dataStats(); + e = learnAndTest(mergeData, 30, busWord, bwNS, userWord, uwNS); + if(!(busWord || userWord)){ + writeDataToFile.writePrediction(folderToWriteData+"A-A+R", folder, A, e); + } + if(busWord && !userWord){ + writeDataToFile.writePrediction(folderToWriteData+"A-A+R+BW", folder, A, e); + } + if(userWord && !busWord){ + writeDataToFile.writePrediction(folderToWriteData+"A-A+R+UW", folder, A, e); + } + if(userWord && busWord){ + writeDataToFile.writePrediction(folderToWriteData+"A-A+R+BUW", folder, A, e); + } + System.gc(); + + + System.out.println("################################################### "+folder+" - Att Cat Rate Word ######################################"); + tomerge = new ArrayList(); + tomerge.clear(); + tomerge.add(A); + tomerge.add(R); + tomerge.add(C); + tomerge.add(W); + mergeData = new data(W); + mergeData.addDataAfterSplit(tomerge); + mergeData.dataStats(); + e = learnAndTest(mergeData, 30, busWord, bwNS, userWord, uwNS); + if(!(busWord || userWord)){ + writeDataToFile.writePrediction(folderToWriteData+"A-A+C+R", folder, A, e); + } + if(busWord && !userWord){ + writeDataToFile.writePrediction(folderToWriteData+"A-A+C+R+BW", folder, A, e); + } + if(userWord && !busWord){ + writeDataToFile.writePrediction(folderToWriteData+"A-A+C+R+UW", folder, A, e); + } + if(busWord && userWord){ + writeDataToFile.writePrediction(folderToWriteData+"A-A+C+R+BUW", folder, A, e); + } + getMemoryDetails(); + + if(busWord || userWord){ + if(busWord && !userWord){ + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"words-bw", W, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"attributes-bw", A, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"categories-bw", C, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"business-bw", R, 0, e); + } + if(userWord && !busWord){ + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"words-uw", W, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"attributes-uw", A, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"categories-uw", C, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"business-uw", R, 0, e); + } + if(userWord && busWord){ + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"words-buw", W, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"attributes-buw", A, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"categories-buw", C, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"business-buw", R, 0, e); + } + } + + } + + public static void RateColdCompleteEvaluation(String folder, data A, data C, data R, data W, boolean busWord, int bwNS, boolean userWord, int uwNS, + String folderToWriteData) throws IOException{ + + System.out.print("Rate Cold Start, "); + Util.checkColdStartSanity(R); + Util.implicitColdStart(R.trainData, R.testData); + + ArrayList tomerge = new ArrayList(); + data mergeData = new data(); + + getMemoryDetails(); + + System.out.println("################################################### "+folder+" - Rating - Word ######################################"); + tomerge = new ArrayList(); + tomerge.clear(); + tomerge.add(R); + tomerge.add(W); + mergeData = new data(W); + mergeData.addDataAfterSplit(tomerge); + mergeData.dataStats(); + embeddings e = learnAndTest(mergeData, 30, busWord, bwNS, userWord, uwNS); + if(!(busWord || userWord)) + writeDataToFile.writePrediction(folderToWriteData+"R-R", folder, R, e); + if(busWord && !userWord) + writeDataToFile.writePrediction(folderToWriteData+"R-R+BW", folder, R, e); + if(userWord && !busWord) + writeDataToFile.writePrediction(folderToWriteData+"R-R+UW", folder, R, e); + if(busWord && userWord) + writeDataToFile.writePrediction(folderToWriteData+"R-R+BUW", folder, R, e); + System.gc(); + + System.out.println("################################################### "+folder+" - Rating - Cat - Word ######################################"); + tomerge = new ArrayList(); + tomerge.clear(); + tomerge.add(R); + tomerge.add(C); + tomerge.add(W); + mergeData = new data(W); + mergeData.addDataAfterSplit(tomerge); + mergeData.dataStats(); + e = learnAndTest(mergeData, 30, busWord, bwNS, userWord, uwNS); + if(!(busWord || userWord)) + writeDataToFile.writePrediction(folderToWriteData+"R-R+C", folder, R, e); + if(busWord && !userWord) + writeDataToFile.writePrediction(folderToWriteData+"R-R+C+BW", folder, R, e); + if(userWord && !busWord) + writeDataToFile.writePrediction(folderToWriteData+"R-R+C+UW", folder, R, e); + if(busWord && userWord) + writeDataToFile.writePrediction(folderToWriteData+"R-R+C+BUW", folder, R, e); + System.gc(); + + System.out.println("################################################### "+folder+" - Att Rate - Word ######################################"); + tomerge = new ArrayList(); + tomerge.clear(); + tomerge.add(A); + tomerge.add(R); + tomerge.add(W); + mergeData = new data(W); + mergeData.addDataAfterSplit(tomerge); + mergeData.dataStats(); + e = learnAndTest(mergeData, 30, busWord, bwNS, userWord, uwNS); + if(!(busWord || userWord)){ + writeDataToFile.writePrediction(folderToWriteData+"R-A+R", folder, R, e); + } + if(busWord && !userWord){ + writeDataToFile.writePrediction(folderToWriteData+"R-A+R+BW", folder, R, e); + } + if(userWord && !busWord){ + writeDataToFile.writePrediction(folderToWriteData+"R-A+R+UW", folder, R, e); + } + if(userWord && busWord){ + writeDataToFile.writePrediction(folderToWriteData+"R-A+R+BUW", folder, R, e); + } + System.gc(); + + + System.out.println("################################################### "+folder+" - Att Cat Rate Word ######################################"); + tomerge = new ArrayList(); + tomerge.clear(); + tomerge.add(A); + tomerge.add(R); + tomerge.add(C); + tomerge.add(W); + mergeData = new data(W); + mergeData.addDataAfterSplit(tomerge); + mergeData.dataStats(); + e = learnAndTest(mergeData, 30, busWord, bwNS, userWord, uwNS); + if(!(busWord || userWord)){ + writeDataToFile.writePrediction(folderToWriteData+"R-A+C+R", folder, R, e); + } + if(busWord && !userWord){ + writeDataToFile.writePrediction(folderToWriteData+"R-A+C+R+BW", folder, R, e); + } + if(userWord && !busWord){ + writeDataToFile.writePrediction(folderToWriteData+"R-A+C+R+UW", folder, R, e); + } + if(busWord && userWord){ + writeDataToFile.writePrediction(folderToWriteData+"R-A+C+R+BUW", folder, R, e); + } + getMemoryDetails(); + + if(busWord || userWord){ + if(busWord && !userWord){ + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"words-bw", W, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"attributes-bw", A, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"categories-bw", C, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"business-bw", R, 0, e); + } + if(userWord && !busWord){ + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"words-uw", W, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"attributes-uw", A, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"categories-uw", C, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"business-uw", R, 0, e); + } + if(userWord && busWord){ + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"words-buw", W, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"attributes-buw", A, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"categories-buw", C, 1, e); + writeDataToFile.writeEmbeddings(folder, folderToWriteData+"business-buw", R, 0, e); + } + } + System.gc(); + } + + public static void performAttBusColdEvaluation(String folder) throws IOException{ + String folderToWriteData = "AttBusCold/"; + data A = readAttributes(folder, 15.0, 15.0, true, 0); + data C = readCategories(folder, 5); + data R = readRatings(folder, 0.0, 0.0, false, 1); + data W = new data(); + + AttBusColdCompleteEvaluation(folder, A, C, R, W, false, 0, false, 0, folderToWriteData); + System.gc(); + + // Business - Words + W = readReviewData(folder, 10, true, false, 0.0, 0.0); + int bwNS = Util.getNegSampleSize(W); + AttBusColdCompleteEvaluation(folder, A, C, R, W, true, bwNS, false, 0, folderToWriteData); + System.gc(); + + // User - Words + W = readReviewData(folder, 10, false, true, 0.0, 0.0); + int uwNS = Util.getNegSampleSize(W); + AttBusColdCompleteEvaluation(folder, A, C, R, W, false, 0, true, uwNS, folderToWriteData); + System.gc(); + + // BusWords and UserWords + /*W = readReviewData(folder, 10, true, true, 0.0, 0.0); + AttBusColdCompleteEvaluation(folder, A, C, R, W, true, bwNS, true, uwNS, folderToWriteData); + System.gc();*/ + } + + public static void performRateBusColdEvaluation(String folder) throws IOException{ + String folderToWriteData = "RateBusCold/"; + data A = readAttributes(folder, 0.0, 0.0, false, 0); + data C = readCategories(folder, 5); + data R = readRatings(folder, 15.0, 15.0, true, 0); + data W = new data(); + + RateColdCompleteEvaluation(folder, A, C, R, W, false, 0, false, 0, folderToWriteData); + System.gc(); + + // Business - Words + W = readReviewData(folder, 10, true, false, 0.0, 0.0); + int bwNS = Util.getNegSampleSize(W); + RateColdCompleteEvaluation(folder, A, C, R, W, true, bwNS, false, 0, folderToWriteData); + System.gc(); + + // User - Words + W = readReviewData(folder, 10, false, true, 0.0, 0.0); + int uwNS = Util.getNegSampleSize(W); + RateColdCompleteEvaluation(folder, A, C, R, W, false, 0, true, uwNS, folderToWriteData); + System.gc(); + + // BusWords and UserWords + /*W = readReviewData(folder, 10, true, true, 0.0, 0.0); + RateColdCompleteEvaluation(folder, A, C, R, W, true, bwNS, true, uwNS, folderToWriteData); + System.gc();*/ + } + + public static void performRateUserColdEvaluation(String folder) throws IOException{ + String folderToWriteData = "RateUserCold/"; + data A = readAttributes(folder, 0.0, 0.0, false, 0); + data C = readCategories(folder, 5); + data R = readRatings(folder, 15.0, 15.0, true, 1); + data W = new data(); + + RateColdCompleteEvaluation(folder, A, C, R, W, false, 0, false, 0, folderToWriteData); + System.gc(); + + // Business - Words + W = readReviewData(folder, 10, true, false, 0.0, 0.0); + int bwNS = Util.getNegSampleSize(W); + RateColdCompleteEvaluation(folder, A, C, R, W, true, bwNS, false, 0, folderToWriteData); + System.gc(); + + // User - Words + W = readReviewData(folder, 10, false, true, 0.0, 0.0); + int uwNS = Util.getNegSampleSize(W); + RateColdCompleteEvaluation(folder, A, C, R, W, false, 0, true, uwNS, folderToWriteData); + System.gc(); + + // BusWords and UserWords + /*W = readReviewData(folder, 10, true, true, 0.0, 0.0); + RateColdCompleteEvaluation(folder, A, C, R, W, true, bwNS, true, uwNS, folderToWriteData); + System.gc();*/ + } + + public static void performHeldOutEvaluation(String folder) throws IOException{ + String folderToWriteData = "HeldOut/"; + data A = readAttributes(folder, 15.0, 15.0, false, 0); + data C = readCategories(folder, 5); + data R = readRatings(folder, 15.0, 15.0, false, 1); + data W = new data(); + + // No Words + completeEvaluation(folder, A, C, R, W, false, 0, false, 0, false, 0, false, 0, folderToWriteData); + System.gc(); + + // Business - Words + W = readReviewData(folder, 10, true, false, 0.0, 0.0); + int bwNS = Util.getNegSampleSize(W); + completeEvaluation(folder, A, C, R, W, true, bwNS, false, 0, false, 0, false, 0, folderToWriteData); + System.gc(); + + // User - Words + W = readReviewData(folder, 10, false, true, 0.0, 0.0); + int uwNS = Util.getNegSampleSize(W); + completeEvaluation(folder, A, C, R, W, false, 0, true, uwNS, false, 0, false, 0, folderToWriteData); + System.gc(); + + // BusWords and UserWords + /*W = readReviewData(folder, 10, true, true, 0.0, 0.0); + getMemoryDetails(); + completeEvaluation(folder, A, C, R, W, true, bwNS, true, uwNS, false, 0, false, 0, folderToWriteData); + System.gc();*/ + } + + // To test one dataset completely and write embeddings for (A + R + C + W) + public static void main(String [] args) throws Exception { + String folder = args[0]; + String todo = args[1]; + todo = "heldOut"; + folder = "EDH"; + + if(todo.equals("heldOut")) + performHeldOutEvaluation(folder); + if(todo.equals("attBusCold")) + performAttBusColdEvaluation(folder); + if(todo.equals("rateBusCold")) + performRateBusColdEvaluation(folder); + if(todo.equals("rateUserCold")) + performRateUserColdEvaluation(folder); + //attBusColdEvaluations(folder); + //rateBusColdEvaluations(folder); + } + + // To make the sizes table + /*public static void main(String [] args) throws Exception { + + String folder = "WI"; + data A = readAttributes(folder, 15.0, 15.0, false, 0); + //data C = readCategories(folder, 5); + data R = readRatings(folder, 0.0, 0.0, false, 1); + //data BW = readReviewData(folder, 10, true, false, 0.0, 0.0); + //data UW = readReviewData(folder, 10, false, true, 0.0, 0.0); + + A.dataStats(); + + Util.getMatrixDetails(A); + Util.getMatrixDetails(C); + Util.getMatrixDetails(R); + //Util.getMatrixDetails(BW); + + + + }*/ + +} \ No newline at end of file diff --git a/Project/src/logisticCMF/data.java b/Project/src/logisticCMF/data.java new file mode 100644 index 0000000..a26f514 --- /dev/null +++ b/Project/src/logisticCMF/data.java @@ -0,0 +1,601 @@ +package logisticCMF; +import java.io.*; +import java.util.*; + +import javax.json.Json; +import javax.json.JsonNumber; +import javax.json.JsonObject; +import javax.json.JsonReader; +import javax.json.JsonString; +import javax.json.JsonValue; + +public class data { + + public ArrayList trainData; + public ArrayList testData; + public ArrayList valData; + public ArrayList Data; + public Map relDataCount; // Map [Relation Id, Count] + ArrayList> entityIds; + + public Map wordCount = new HashMap(); // Map [Words -> Count] - In original review Data + public Map> busWord = new HashMap>(); // Map [restaurant -> [words used for it]] - Can be pruned according to word occurrence threshold + public Map> userWord = new HashMap>(); // Map [user -> [words used for it]] - Can be pruned according to word occurrence threshold + public ArrayList words = new ArrayList(); // Set [words] - The vocab for our review data. Can be pruned according to word occurrence threshold + public static Random seed = new Random(50); // Also defined in Embedding & Learner Class + + + public data(){ + trainData = new ArrayList(); + valData = new ArrayList(); + testData = new ArrayList(); + Data = new ArrayList(); + relDataCount = new HashMap(); + wordCount = new HashMap(); + busWord = new HashMap>(); + userWord = new HashMap>(); + words = new ArrayList(); + entityIds = new ArrayList>(); + entityIds.add(new HashSet()); + entityIds.add(new HashSet()); + } + + public data(data D){ + busWord = D.busWord; + userWord = D.userWord; + words = D.words; + wordCount = D.wordCount; + trainData = new ArrayList(); + valData = new ArrayList(); + testData = new ArrayList(); + Data = new ArrayList(); + relDataCount = new HashMap(); + entityIds = new ArrayList>(); + entityIds.add(new HashSet()); + entityIds.add(new HashSet()); + } + + public void addToEntitySets(Cell cell){ + entityIds.get(0).add(cell.entity_ids.get(0)); + entityIds.get(1).add(cell.entity_ids.get(1)); + } + + public void readBusAtt(String fileAddress, String rel) throws IOException{ + BufferedReader br = new BufferedReader(new FileReader(fileAddress)); + String relation = "b-att-"+rel; + String line; + String bId = ""; + int count = 0; + while( (line = br.readLine()) != null ){ + //System.out.println(line); + String [] arr = line.split(":"); + if(arr.length >= 2){ + if(arr[0].trim().equals("business_id")) + bId = arr[1].trim(); + + else{ + String att = arr[0].trim(); + double t = Double.parseDouble(arr[1].trim()); + Cell cell = new Cell(); + cell.relation_id = relation; + cell.entity_ids.add(bId); + cell.entity_ids.add(att); + cell.truth = (t == 1.0) ? true : false; + addToEntitySets(cell); + Data.add(cell); + count++; + } + + + } + } + br.close(); + System.out.println(relation + " : " + count); + } + + public void printData(){ + for(Cell cell : Data){ + System.out.print("\n"+cell.relation_id + ", "); + for(String e : cell.entity_ids) + System.out.print(e + ", "); + System.out.print(cell.truth); + } + } + + public void makeRestaurantSet(String fileAddress) throws IOException{ + BufferedWriter bw = new BufferedWriter(new FileWriter(fileAddress+"1")); + HashMap restaurants = new HashMap(); + for(Cell cell : Data){ + if(restaurants.containsKey(cell.entity_ids.get(0))) + continue; + else{ + restaurants.put(cell.entity_ids.get(0), 1.0); + bw.write(cell.entity_ids.get(0) + "\n"); + } + } + bw.close(); + } + + public void refreshTrainTest(){ + trainData = new ArrayList(); + valData = new ArrayList(); + testData = new ArrayList(); + } + + public void readRatingData(String fileAddress, String rel) throws IOException{ + Map> ratings = new HashMap>(); + BufferedReader br = new BufferedReader(new FileReader(fileAddress)); + String line; int countl = 0, countcell = 0; + String relation = "b-u-rate-"+rel; + String busid = null, userid = null; boolean value = false; + while((line = br.readLine()) != null){ + countl++; + String[] array = line.split(":"); + if( array[0].trim().equals("bus_id")){ + busid = array[1].trim(); + if(!ratings.containsKey(busid)) + ratings.put(busid, new HashMap()); + } + if( array[0].trim().equals("user_id")) + userid = array[1].trim(); + if( array[0].trim().equals("star")){ + double t = Double.parseDouble(array[1].trim()); + ratings.get(busid).put(userid, (int)t); + } + } + br.close(); + for(String bus : ratings.keySet()){ + for(String user : ratings.get(bus).keySet()){ + int rate = ratings.get(bus).get(user); + Cell cell = new Cell(); + cell.relation_id = relation; + cell.entity_ids.add(bus); + cell.entity_ids.add(user); + cell.truth = (rate >= 4) ? true : false; + Data.add(cell); + addToEntitySets(cell); + countcell++; + } + } + System.out.println("Ratings read : " + countcell); + } + + public void countLines(String fileAddress) throws IOException{ + BufferedReader br = new BufferedReader(new FileReader(fileAddress)); + String line; String relation = "restaurant-user-word"; int count = 0; + String busid = null, userid = null; boolean value = false; + while((line = br.readLine()) != null){ + count ++; + } + System.out.println(count); + } + + public void splitTrainTestValidation(double valPercentage, double testPercentage){ + Collections.shuffle(Data, seed); + for(int j = 0; j coldEntities = Util.getColdEntites(Data, index, testPerc); + int countTest = 0, i=0; + for(Cell cell : Data){ + if(coldEntities.contains(cell.entity_ids.get(index))){ + testData.add(cell); + countTest++; + } + else + trainData.add(cell); + } + Collections.shuffle(trainData, seed); + Iterator it = trainData.iterator(); + int cellValidation = (int)((valPercentage/100)*Data.size()); + System.out.println(cellValidation); + while(i < cellValidation){ + Cell cell = it.next(); + valData.add(cell); + it.remove(); + i++; + } + } + + public void addDatainExisting(data d){ + Collections.shuffle(Data, seed); Collections.shuffle(d.Data, seed); + for(Cell cell : d.Data) + Data.add(cell); + + Collections.shuffle(Data, seed); + } + + public void addDataAfterSplit(ArrayList dataList){ + for(data d : dataList){ + for(Cell cell : d.Data) + Data.add(cell); + for (Cell cell : d.trainData) + trainData.add(cell); + for(Cell cell : d.valData) + valData.add(cell); + for(Cell cell : d.testData) + testData.add(cell); + } + + Collections.shuffle(Data, seed); + Collections.shuffle(trainData, seed); + Collections.shuffle(valData, seed); + Collections.shuffle(testData, seed); + } + + // Used by dataStats() function + public void countTrueFalse(ArrayList data, Map relTrue, Map relFalse){ + for(Cell cell : data){ + int t = (cell.truth) ? 1 : 0; int f = (cell.truth) ? 0 : 1; + if(t == 1){ + if(!relTrue.containsKey(cell.relation_id)) + relTrue.put(cell.relation_id, 1); + else + relTrue.put(cell.relation_id, relTrue.get(cell.relation_id) + 1); + } + if(f == 1){ + if(!relFalse.containsKey(cell.relation_id)) + relFalse.put(cell.relation_id, 1); + else + relFalse.put(cell.relation_id, relFalse.get(cell.relation_id) + 1); + } + } + } + + public void dataStats(){ + Map relTrue = new HashMap(); + Map relFalse = new HashMap(); + countTrueFalse(Data, relTrue, relFalse); + System.out.println("\nData Stats"); + for(String rel : relTrue.keySet()) + System.out.println(rel + " : " + "t : " + relTrue.get(rel) + " f : " + relFalse.get(rel)); + + + + relTrue.clear(); + relFalse.clear(); + countTrueFalse(trainData, relTrue, relFalse); + System.out.println("Train Data Stats"); + for(String rel : relTrue.keySet()) + System.out.println(rel + " : " + "t : " + relTrue.get(rel) + " f : " + relFalse.get(rel)); + + relTrue.clear(); + relFalse.clear(); + countTrueFalse(valData, relTrue, relFalse); + System.out.println("Validation Data Stats"); + for(String rel : relTrue.keySet()) + System.out.println(rel + " : " + "t : " + relTrue.get(rel) + " f : " + relFalse.get(rel)); + + relTrue.clear(); + relFalse.clear(); + countTrueFalse(testData, relTrue, relFalse); + System.out.println("Test Data Stats"); + for(String rel : relTrue.keySet()) + System.out.println(rel + " : " + "t : " + relTrue.get(rel) + " f : " + relFalse.get(rel)); + + } + + public void reviewDataStats(int entityId, int thresh, boolean removeEntities){ + Map users = new HashMap(); // Map[EntityID, Count in Set] + Set e1 = new HashSet(); + Set e2 = new HashSet(); + Set setUsers = new HashSet(); + int count = 0, max = -1, min = 1000000000; + for(Cell cell : Data){ + String user = cell.entity_ids.get(entityId); /// CHECK WHICH ENTITY MAP IS CREATED + e1.add(cell.entity_ids.get(0)); + e2.add(cell.entity_ids.get(1)); + setUsers.add(cell.entity_ids.get(entityId)); + if(!users.containsKey(user)){ + users.put(user, 1); + } + else{ + int revCount = users.get(user); + int newRevCount = revCount+1 ; + users.put(user, newRevCount); + } + } + + for(String user : users.keySet()){ + if(users.get(user) > max) + max = users.get(user); + if(users.get(user) < min) + min = users.get(user); + if(users.get(user) <= thresh){ + count++; + setUsers.remove(user); + //System.out.println(user + " : " + users.get(user)); + } + } + int iterates = 0; + if(removeEntities){ + for(Iterator itr = Data.iterator();itr.hasNext();){ + iterates++; + Cell cell = itr.next(); + if(!setUsers.contains(cell.entity_ids.get(entityId))){ + itr.remove(); + } + } + } + System.out.println("Total iterates = "+iterates); + System.out.println(e1.size() + " : " + e2.size() + " : " +users.keySet().size() + " : " + setUsers.size()); + System.out.println("count : " + count + " max = " + max + " min : " + min); + } + + public void readAndCompleteCategoryData(String fileAddress, int thresh, String rel) throws NumberFormatException, IOException{ + BufferedReader br = new BufferedReader(new FileReader(fileAddress)); + Map> resCat = new HashMap>(); + Map catCount = new HashMap(); + Set categories = new HashSet(); + String line; String relation = "b-cat-"+rel; + int count = 0; + while( (line = br.readLine()) != null) { + + String[] array = line.split(":"); + resCat.put(array[0].trim(), new ArrayList()); + + String [] cats = array[1].trim().split(";"); // Delimiter used in resCat file. Refer to YelpChallenge-yeldData.java + for(String cat : cats){ + String category = cat.trim(); + if(category.length()>1){ + categories.add(category); + resCat.get(array[0].trim()).add(category); + + // To build Map[Category, Count] + if(!catCount.containsKey(category)) + catCount.put(category, 1); + else{ + int cat_count = catCount.get(category); + cat_count++; + catCount.put(category, cat_count); + } + } + } + } + br.close(); + + + int cC = 0; + for(String c : catCount.keySet()){ + if(catCount.get(c) > thresh) + cC++; + } + //System.out.println("Categories after pruning : " + cC); + + for(String res : resCat.keySet()){ + int categoriesConsidered = 0; + for(String cat : categories){ + if(catCount.get(cat) > thresh){ + categoriesConsidered++; + Cell cell = new Cell(); + cell.relation_id = relation; + cell.entity_ids.add(res); + cell.entity_ids.add(cat); + if(resCat.get(res).contains(cat)) + cell.truth = true; + else + cell.truth = false; + count++; + Data.add(cell); + addToEntitySets(cell); + } + cC = categoriesConsidered; + } + } + System.out.println(relation + " : " + count + " categories read : " + cC); + } + + public void reduceDataSize(double perc){ + Collections.shuffle(Data, seed); + int iterations = 0; int size = Data.size(); int stopAt = (int)((perc/100.0)*size); + System.out.println(stopAt); + for(Iterator itr = Data.iterator();itr.hasNext();){ + iterations++; + if(iterations <= stopAt) + itr.next(); + else{ + itr.next(); + itr.remove(); + } + } + } + + public void readReviewData(String fileAddress) throws IOException{ + BufferedReader br = new BufferedReader(new FileReader(fileAddress)); + String line; String bId = null; String userId = null; int countl = 0, countR = 0; + + while((line = br.readLine()) != null){ + countl++; + String[] array = line.split(":"); + + if( array[0].trim().equals("user_id")){ + countR++; + userId = array[1].trim(); + if(!userWord.containsKey(userId)) + userWord.put(userId, new HashSet()); + } + + if( array[0].trim().equals("bus_id")){ + bId = array[1].trim(); + if(!busWord.containsKey(bId)) + busWord.put(bId, new HashSet()); + } + + if(array[0].trim().equals("text")){ + String [] tokens = array[1].trim().split(" "); + if(tokens.length > 0){ + for(String word : tokens){ + word = word.trim(); + if(word.length() >= 3){ + addWordInMap(word); + busWord.get(bId).add(word); + userWord.get(userId).add(word); + } + } + } + } + /*if(countl % 100000 == 0) + System.out.println("line : "+countl);*/ + } + System.out.println("Total No. of Reviews : " + countR); + } + + public void addWordInMap(String word){ + if(!wordCount.containsKey(word)) + wordCount.put(word, 0); + int wcount = wordCount.get(word); + wcount++; + wordCount.put(word, wcount); + } + + public void getMapStats(Map> enWord){ + int potentialResWordCells = 0; + int min=100000000; + System.out.println("No. of entities = " + enWord.keySet().size()); + System.out.println("No. of total words in Vocab = " + words.size()); + + for(String en : enWord.keySet()){ + min = (enWord.get(en).size() < min) ? enWord.get(en).size() : min; + for(String word : enWord.get(en)){ + potentialResWordCells++; + } + + } + System.out.println("Potential Entity-Word Cells : " + potentialResWordCells); + System.out.println("Words per entiy : " + ((double) potentialResWordCells)/enWord.keySet().size()); + System.out.println("Min No. of Words in Entity : " + min); + + } + + public void pruneVocab_EntityMap(int occThresh){ + makePrunedWordList(occThresh); + pruneEntityWordMap(busWord, occThresh); + pruneEntityWordMap(userWord, occThresh); + } + + // Remove words from Map[Entity -> Set[words]] that occur few times in dictionary. If Set of words for entity go empty, remove Entity from Map. + public void pruneEntityWordMap(Map> enWord, int occThresh){ + Iterator it = enWord.keySet().iterator(); + while(it.hasNext()){ + String en = it.next(); + Iterator itr = enWord.get(en).iterator(); + while(itr.hasNext()){ + String word = itr.next(); + if(wordCount.get(word) <= occThresh) + itr.remove(); + } + if(enWord.get(en).size() == 0) + it.remove(); + } + } + + public void getWordCountStats(int start, int end){ + int count = 0; + for(int i = start; i<=end; i++){ + for(String word : wordCount.keySet()){ + if(wordCount.get(word) > i) + count++; + } + System.out.println("Words with greater that " + i + " count : " + count); + count = 0; + } + } + + // Make a Array of Words that have frequency above the given threshold. + public void makePrunedWordList(int occThresh){ + words = new ArrayList(); int count = 0; + for(String word : wordCount.keySet()){ + if(wordCount.get(word) > occThresh){ + count++; + words.add(word); + } + } + System.out.println("Words with greater than occurence of " + occThresh + " : " + words.size()); + } + + public void makeEnWordCells(String relation){ + if(relation.equals("b-word")){ + enWordCells(busWord, relation); + } + else if(relation.equals("u-word")){ + enWordCells(userWord, relation); + } + } + + public void enWordCells(Map> enWord, String relation){ + for(String en : enWord.keySet()){ + for(String word : enWord.get(en)){ + Cell cell = new Cell(); + cell.relation_id = relation; + cell.entity_ids.add(en); + cell.entity_ids.add(word); + cell.truth = true; + Data.add(cell); + addToEntitySets(cell); + } + } + } + + public ArrayList getNegativeSamples(String relation, int negSamplesPerEntity){ + ArrayList negSamples = new ArrayList(); + int negSamplesDone = 0; + if(relation.equals("b-word")){ + for(String en : busWord.keySet()){ + while(negSamplesDone < negSamplesPerEntity){ + Cell cell = genNegSample(busWord, en, relation); + negSamplesDone++; + negSamples.add(cell); + } + negSamplesDone = 0; + } + + } + else if(relation.equals("u-word")){ + for(String en : userWord.keySet()){ + while(negSamplesDone < negSamplesPerEntity){ + Cell cell = genNegSample(userWord, en, relation); + negSamplesDone++; + negSamples.add(cell); + } + negSamplesDone = 0; + } + } + return negSamples; + } + + public Cell genNegSample(Map> enWord, String en, String relation){ + Cell cell = new Cell(); + cell.relation_id = relation; + boolean found = false; + while(!found){ + int pos = randInt(0, words.size() - 1); + if(!enWord.get(en).contains(words.get(pos))){ + cell.entity_ids.add(en); + cell.entity_ids.add(words.get(pos)); + cell.truth = false; + found = true; + } + } + return cell; + } + + public static int randInt(int min, int max) { + // nextInt is normally exclusive of the top value, + // so add 1 to make it inclusive + int randomNum = seed.nextInt((max - min) + 1) + min; + + return randomNum; + } + +} diff --git a/Project/src/logisticCMF/embedding.java b/Project/src/logisticCMF/embedding.java new file mode 100644 index 0000000..91f38b5 --- /dev/null +++ b/Project/src/logisticCMF/embedding.java @@ -0,0 +1,29 @@ +package logisticCMF; +import java.util.*; + +/* + * Embedding Class - Stores embedding for each entity which includes : + * Bias for each relation the entity belongs to + * K- Dimensional Latent Vector for each entity +*/ + +public class embedding { + Map bias; // Map : [Relation_id, bias] + double[] vector; // K-Dimensional Latent Vector + static Random rand = new Random(20); + + // For entity realized first time. Put Bias = 0.0 for relation and initialize latent vector to random + public embedding(int K){ + bias = new HashMap(); + vector = new double[K]; + for(int k=0; k embs; // Map[Entity_Id, Embedding] + Map alpha; // Map [Relation_Id, Alpha (matrix mean) ] + HashSet relIds; // Set of relation Ids + HashSet entityIds; // Set of Entity Ids + int K ; + static Random rand = new Random(); + + // Instantiating Embeddings for Data + public embeddings(data YData, int lK){ + K = lK; + alpha = new HashMap(); + embs = new HashMap(); + for(Cell cell : YData.Data){ + for(String entityId : cell.entity_ids){ + if(!embs.containsKey(entityId)) + embs.put(entityId, new embedding(K)); + + embs.get(entityId).addRelation(cell.relation_id); + } + } + System.out.println("Unique Entites in Database : " + embs.keySet().size()); + computeAlpha(YData); + } + + public embeddings(embeddings e){ + this.alpha = e.alpha; + this.embs = e.embs; + this.relIds = e.relIds; + this.entityIds = e.entityIds; + this.K = e.K; + } + + //Compute Alpha - Relation wise mean of values + public void computeAlpha(data D){ + Map relSum = new HashMap(); // Sum of truth values in each relation + Map relCount = new HashMap(); // Count of truth values in each relation + + for(Cell cell : D.trainData){ + if(!relSum.containsKey(cell.relation_id)){ + if(cell.truth == true) + relSum.put(cell.relation_id, 1); + else + relSum.put(cell.relation_id, 0); + relCount.put(cell.relation_id, 1); + } + else{ + int sum = relSum.get(cell.relation_id); + int count = relCount.get(cell.relation_id); + if(cell.truth == true){ + sum += 1; + } + count++; + relSum.put(cell.relation_id, sum); + relCount.put(cell.relation_id, count); + } + + } + + for(String relId : relSum.keySet()){ + double a = ((double)relSum.get(relId))/relCount.get(relId); + a = Math.log((a / (1-a))); + //alpha.put(relId, a); + alpha.put(relId, rand.nextGaussian()*0.001); + } + + + } + + + // Dot Product of vectors in cell, but leave out one entity, For fixed k + public double coeffVector(Cell cell, String leaveEntity, int k){ + double result=1.0; + for(String entityId : cell.entity_ids){ + if(!entityId.equals(leaveEntity)) + result *= embs.get(entityId).vector[k]; + } + return result; + } + + // Dot Product of vectors in cell, but leave out one entity, For fixed k + public double dot(Cell cell, boolean enableBias, int K, boolean ealpha, boolean onlyAlpha){ + double result=0.0; + + if(ealpha) + result += alpha.get(cell.relation_id); + if(!onlyAlpha){ + if(enableBias){ + for(String entityId : cell.entity_ids){ + if(!embs.containsKey(entityId)) + System.out.println("Entity not found : " + entityId); + if(!embs.get(entityId).bias.containsKey(cell.relation_id)) + System.out.println("Relation Not found : " + cell.relation_id); + result += embs.get(entityId).bias.get(cell.relation_id); + + + } + } + + for(int k = 0; k trainData = new ArrayList(); + embeddings eBest = new embeddings(embedings); + + Map> evalMap = Eval.getEvalMap(Data, eBest, "test"); + Eval.printEval(); + while(notConverged){ + + trainData.clear(); + trainData.addAll(Data.trainData); + System.out.println("Train Data Original : " + trainData.size()); + if(busWord){ + ArrayList negSamples = Data.getNegativeSamples("b-word", busWordNegSamSize); + trainData.addAll(negSamples); + } + if(userWord){ + ArrayList negSamples = Data.getNegativeSamples("u-word", userWordNegSamSize); + trainData.addAll(negSamples); + } + System.out.println("trainData : " + trainData.size()); + codeTest.getMemoryDetails(); + System.gc(); + Collections.shuffle(trainData, seed); // Shuffle List of Training Data before each iteration of learning parameters + for(Cell cell : trainData){ + update(cell, enableBias, embedings, ealpha, onlyAlpha); + } + //System.out.println("Train Data size :" + trainData.size()); + epoch++; + System.out.print(epoch + " "); + System.gc(); + if(epoch%5 == 0){ + System.out.println("################## Epoch : " + epoch + " ############"); + evalMap = Eval.getEvalMap(Data, embedings, "validation"); + Eval.printEval(); + double wf1 = evalMap.get("average").get(3), wacc = evalMap.get("average").get(0); + if(epoch == 5){ + //maxF1 = wf1; maxAcc = wacc; + maxF1 = 0.0; maxAcc = 0.0; + dropfor = 0; nochange = 0; + bestEpoch = epoch; + eBest = new embeddings(embedings); + } + else{ + //System.out.println("dF : " + dropfor + " nc : " + nochange + " maxF1 : " + maxF1 + " maxAcc : " + maxAcc); + if(wf1 > maxF1 || wacc > maxAcc){ + bestEpoch = epoch; + eBest = new embeddings(embedings); + maxF1 = wf1; + maxAcc = wacc; + dropfor = 0; nochange = 0; + } + else{ + if(wf1 == maxF1 || wacc == maxAcc) + nochange++; + else + dropfor++; + } + } + + } + if(dropfor >= 3 || nochange >= 4) /// CONDITIONS for STOPPING + notConverged = false; + } + System.out.println("TRAINING CONVEREGED, BEST EPOCH = " + bestEpoch); + return eBest; + } + + +} diff --git a/Project/src/logisticCMF/writeDataToFile.java b/Project/src/logisticCMF/writeDataToFile.java new file mode 100644 index 0000000..158173a --- /dev/null +++ b/Project/src/logisticCMF/writeDataToFile.java @@ -0,0 +1,33 @@ +package logisticCMF; + +import java.util.*; +import java.io.*; + +public class writeDataToFile { + public static void writePrediction(String fileName, String folder, data Data, embeddings e) throws IOException{ + String fileAddress = System.getProperty("user.dir")+"/../Embeddings_Prediction_Data/"+ folder +"/pred-data/" + fileName; + BufferedWriter bw = new BufferedWriter(new FileWriter(fileAddress)); + for(Cell cell : Data.testData){ + double dot = e.dot(cell, learner.enableBias, e.K, learner.ealpha, learner.onlyAlpha); + double sigmdot = learner.sigm(dot); + int truth = (cell.truth) ? 1 : 0; + String e1 = cell.entity_ids.get(0), e2 = cell.entity_ids.get(1); + bw.write(e1 + " :: " + e2 + " :: " + sigmdot + " :: " + truth +"\n") ; + } + bw.close(); + } + + public static void writeEmbeddings(String folder, String fileName, data Data, int entityNumber, embeddings e) throws IOException{ + String fileAddress = System.getProperty("user.dir")+"/../Embeddings_Prediction_Data/"+ folder +"/embeddings/" + fileName; + BufferedWriter bw = new BufferedWriter(new FileWriter(fileAddress)); + for(String entity : Data.entityIds.get(entityNumber)){ + bw.write(entity + " :: "); + embedding em = e.embs.get(entity); + for(int i=0; i entityVector; + + public EntityEmbeddings(String folder, String fileName, int K) throws IOException{ + entityVector = new HashMap(); + readEmbeddings(folder, fileName, K); + } + + public void readEmbeddings(String folder, String fileName, int K) throws IOException{ + String fileAddress = System.getProperty("user.dir")+"/../Embeddings_Prediction_Data/"+ folder +"/embeddings/" + fileName; + BufferedReader br = new BufferedReader(new FileReader(fileAddress)); + String line; + while((line = br.readLine()) != null){ + String[] array = line.split("::"); + + // Reading entity from embeddings file and creating + String entity = array[0].trim(); + if(!entityVector.containsKey(entity)) + entityVector.put(entity, new double[K]); + else + System.out.println("Duplicate Entity. ERROR !!!!!!"); + + // Reading latent vector and storing in Map + String[] vec = array[1].trim().split(","); + for(int i=0; i> KNN(EntityEmbeddings a, EntityEmbeddings b, int K){ + Map> kNN = new HashMap>(); + for(String en1 : a.entityVector.keySet()){ + double [] vec1 = a.entityVector.get(en1); + Map distances = new HashMap(); + for(String en2 : b.entityVector.keySet()){ + double[] vec2 = b.entityVector.get(en2); + distances.put(en2, Util.dotProd(vec1, vec2)); + } + + kNN.put(en1, Util.getKNN(distances, K)); + } + + return kNN; + } + + public Map> getSimilarity(EntityEmbeddings ee1, EntityEmbeddings ee2){ + Map> simMap = KNN(ee1, ee2, 20); + + for(String en : simMap.keySet()){ + System.out.print(en + " : "); + for(String nn : simMap.get(en)){ + System.out.print(nn + ", "); + } + System.out.println(); + } + + return simMap; + } + + + public static void main(String [] args) throws IOException { + String folder = "AZ"; + String evaluation = "HeldOut"; + System.out.println("Start"); + + EntityEmbeddings attributes = new EntityEmbeddings(folder, evaluation+"/"+"attributes-bw", 30); + EntityEmbeddings words = new EntityEmbeddings(folder, evaluation+"/"+"words-bw", 30); + EntityEmbeddings categories = new EntityEmbeddings(folder, evaluation+"/"+"categories-bw", 30); + EntityEmbeddings business = new EntityEmbeddings(folder, evaluation+"/"+"business-bw", 30); + + Similarity s = new Similarity(); + s.getSimilarity(categories, words); + + + + + } + +} diff --git a/Project/src/postProcessing/Util.java b/Project/src/postProcessing/Util.java new file mode 100644 index 0000000..5ce7190 --- /dev/null +++ b/Project/src/postProcessing/Util.java @@ -0,0 +1,62 @@ +package postProcessing; +import java.util.*; +import java.util.Map.Entry; + +public class Util { + + public static double sigm(double x){ + return 1.0 / (1.0 + Math.exp(-x)); + } + + public static double norm(double [] vec){ + double norm = 0.0; + for(int i=0; i getKNN(Map map, int K){ + ArrayList top = new ArrayList(); + Set> set = map.entrySet(); + List> list = new ArrayList>(set); + Collections.sort( list, new Comparator>() + { + public int compare( Map.Entry o1, Map.Entry o2 ) + { + return (o2.getValue()).compareTo( o1.getValue() ); + } + } ); + + for(int i=0; i entry : list) + System.out.println(entry.getKey() + " ==== " + entry.getValue());*/ + } + + public static void writeSelectEmbeddingsToFile(String folder, String selectionEmbeddings, Map> simMap){ + + } + +} diff --git a/Project/src/yelpDataProcessing/AttributeCategory.java b/Project/src/yelpDataProcessing/AttributeCategory.java new file mode 100644 index 0000000..25147de --- /dev/null +++ b/Project/src/yelpDataProcessing/AttributeCategory.java @@ -0,0 +1,311 @@ +package yelpDataProcessing; + +import java.util.*; +import java.io.*; + +import javax.json.*; +import javax.json.spi.*; + +/* + * Reads the yelp_dataset_restaurant json file and creates + * - resAtt.txt - The restaurant-attribute data that can directly be factorized + * - resCat.txt - The retaurant-category data that can be factorized but needs negative data + */ + + +public class AttributeCategory { + + Map> attributes; // Map [Attribute, SubAttribute] + Set categories; + Map catCount; // Map [Category, Occurrence Count] + Map busCatCount; // Map [Restaurant, No. of Category Count] + Set catReduced; // Set [Categories] - Thresholded on terms of occurrence + Map> busCat; // Map [Res, List[Categories]] + + + public void printCategories() { + for(Object ob : categories){ + System.out.println(ob); + } + } + + public void printAttributes() { + for(String att : attributes.keySet()){ + System.out.print(att + " : "); + for(String subatt : attributes.get(att).keySet()) + System.out.print(subatt+", "); + System.out.print("\n"); + } + System.out.println("Total Attributes : " + attributes.keySet().size()); + } + + private void buildCategorySet(String folder) throws IOException { + BufferedReader br = new BufferedReader(new FileReader(System.getProperty("user.dir")+"/../Dataset/json/"+folder+"/business")); + categories = new HashSet(); + catCount = new HashMap(); + busCatCount = new HashMap(); + busCat = new HashMap>(); + + String line; + int count = 0; + while(( (line = br.readLine()) != null) ){ + JsonReader reader = Json.createReader(new StringReader(line)); + JsonObject object = reader.readObject(); + if(object.get("type").toString().equals("\"business\"")){ + if(object.get("categories").getValueType().toString().equals("ARRAY")){ + JsonArray cat = (JsonArray) object.get("categories"); + JsonValue b_id = object.get("business_id"); + JsonString bus_id = (JsonString) b_id; + busCatCount.put(bus_id.getString(), cat.size()); + busCat.put(bus_id.getString(), new ArrayList()); + for(JsonValue s : cat){ + JsonString c = (JsonString) s; + String category = c.getString(); + categories.add(category); + busCat.get(bus_id.getString()).add(category); + if(!catCount.containsKey(category)) + catCount.put(category, 1); + else + catCount.put(category, catCount.get(category)+1 ); + } + } + count++; + } + } + //return categories; + System.out.println("Businesses Read : " + count); + } + + private void buildThresholdCatSet(int thresh){ + catReduced = new HashSet(); + for(String cat : catCount.keySet()){ + if(catCount.get(cat) >= thresh){ + catReduced.add(cat); + } + } + } + + /* Reads the Yelp JSON Dataset and makes attributes hashmap */ + private void readAttributes(String folder) throws IOException{ + BufferedReader br = new BufferedReader(new FileReader(System.getProperty("user.dir")+"/../Dataset/json/"+folder+"/business")); + attributes = new HashMap>(); + String line; + while(( (line = br.readLine()) != null) ){ + JsonReader reader = Json.createReader(new StringReader(line)); + JsonObject object = reader.readObject(); + //if(object.containsKey("attributes")){ + if(object.get("type").toString().equals("\"business\"")){ + if(object.get("attributes").getValueType().toString() == "OBJECT"){ + JsonObject attributeObject = (JsonObject) object.getJsonObject("attributes"); + for(String key : attributeObject.keySet()){ + if(!attributes.containsKey(key)){ // To Make Attributes Keys Set + attributes.put(key, new HashMap()); + getValueSet(attributeObject.get(key), key, attributes); + } + else + getValueSet(attributeObject.get(key), key, attributes); + } + } + } + } + br.close(); + } + + /* Creates a dataset with business Id and values for attributes and stores them in a file. Uses the attributes hashmap. */ + private void buildBusiness_AttributeDataset(String folder) throws IOException{ + BufferedReader br = new BufferedReader(new FileReader(System.getProperty("user.dir")+"/../Dataset/json/"+folder+"/business")); + BufferedWriter bw = new BufferedWriter(new FileWriter(System.getProperty("user.dir")+"/../Dataset/data/" + folder + "/busAtt.txt")); + String line; + int count = 0; + + while(( (line = br.readLine()) != null) && count < 42151 ){ + StringBuilder str = new StringBuilder(); + JsonReader reader = Json.createReader(new StringReader(line)); + JsonObject object = reader.readObject(); + JsonValue b_id = object.get("business_id"); + JsonString bus_id = (JsonString) b_id; + bw.write("business_id : "+bus_id.getString()+"\n"); + + if(object.get("attributes").getValueType().toString() == "OBJECT"){ + JsonObject attributeObject = (JsonObject) object.getJsonObject("attributes"); + navigateObjectForValues(attributeObject, null, null, str); + //System.out.println(str.toString()); + bw.write(str.toString()+"\n\n"); + + } + count++; + } + System.out.println("No. of Business : "+count); + br.close(); + bw.close(); + } + + /* */ + public void navigateObjectForValues(JsonValue tree, String key, String prevKey, StringBuilder str) { + switch(tree.getValueType()) { + case OBJECT: + //System.out.println("OBJECT"); + JsonObject object = (JsonObject) tree; + for (String name : object.keySet()) + navigateObjectForValues(object.get(name), name, key, str); + break; + case ARRAY: + break; + case STRING: + JsonString st = (JsonString) tree; + if (key!= null){ + if(prevKey != null){ + //System.out.println(prevKey+"_" + key + "_"+st.getString()+" : "+1); + str.append(prevKey+"_" + key + "_"+st.getString()+" : "+1 + "\n"); + if(attributes.get(key).keySet().size() != 0) + for(String s: attributes.get(key).keySet()) + if(!s.equals(st.getString())) + //System.out.println(prevKey+"_" + key + "_"+s+" : "+0); + str.append(prevKey+"_" + key + "_"+s+" : "+0+"\n"); + } + else{ + //System.out.println(key + "_"+st.getString()+" : "+1); + str.append(key + "_"+st.getString()+" : "+1+"\n"); + + if(attributes.get(key).keySet().size() != 0) + for(String s: attributes.get(key).keySet()) + if(!s.equals(st.getString())) + //System.out.println(key + "_"+s+" : "+0); + str.append(key + "_"+s+" : "+0+"\n"); + } + } + + + break; + case NUMBER: + /*if (key!= null) + if(prevKey != null) + System.out.print(prevKey+"_" + key + " : "); + else + System.out.print(key + " : "); + JsonNumber num = (JsonNumber) tree; + System.out.println(num.toString()); + */ + break; + case TRUE: + if (key!= null) + if(prevKey != null) + //System.out.print(prevKey+"_" + key + " : "); + str.append(prevKey+"_" + key + " : "+1+"\n"); + else +// //System.out.print(key + " : " + 1); + str.append(key + " : " + 1+"\n"); + //System.out.println(1); + break; + case FALSE: + case NULL: + if (key!= null) + if(prevKey != null) + //System.out.print(prevKey+"_" + key + " : "); + str.append(prevKey+"_"+key + " : " + 0+"\n"); + else + //System.out.print(key + " : "); + str.append(key + " : " + 0+"\n"); + //System.out.println(0); + break; + } + } + + private void getValueSet(JsonValue attribute, String attributeName, Map> attributes){ + switch(attribute.getValueType()){ + + case STRING: + JsonString st = (JsonString) attribute; + if(!attributes.get(attributeName).containsKey(st)) + attributes.get(attributeName).put(st.getString(), 1); + break; + + case NUMBER: + if(!attributes.get(attributeName).containsKey("NUMBER")) + attributes.get(attributeName).put("NUMBER", 1); + break; + case TRUE: + if(!attributes.get(attributeName).containsKey("TRUE")) + attributes.get(attributeName).put("TRUE", 1); + break; + case FALSE: + if(!attributes.get(attributeName).containsKey("FALSE")) + attributes.get(attributeName).put("FALSE", 1); + break; + case OBJECT: + JsonObject att = (JsonObject) attribute; + for(String subAttributeName : att.keySet()) + if(!attributes.get(attributeName).containsKey(subAttributeName)) + attributes.get(attributeName).put(subAttributeName, 1); + break; + } + } + + // Writes Restaurant : Category1, Category2, ... - to file resCat.txt + private void writeResCatToFile(String folder) throws IOException{ + BufferedWriter bw = new BufferedWriter(new FileWriter(System.getProperty("user.dir")+"/../Dataset/data/" + folder +"/busCat.txt")); + for(String res : busCat.keySet()){ + bw.write(res + " : "); + for(String cat : busCat.get(res)) + bw.write(cat + "; "); + bw.write("\n"); + } + bw.close(); + } + + private void makeCitySet(String dataset) throws IOException { + BufferedReader br = new BufferedReader(new FileReader(System.getProperty("user.dir")+"/../Dataset/json"+dataset)); + String line; int count = 0; + Map states = new HashMap(); + while(( (line = br.readLine()) != null) && count < 42151){ + JsonReader reader = Json.createReader(new StringReader(line)); + JsonObject object = reader.readObject(); + + JsonString city = (JsonString) object.get("state"); + String ci = city.getString(); + if(!states.containsKey(ci)) + states.put(ci, 0); + int sc = states.get(ci); + sc++; + states.put(ci, sc); + + count++; + } + + for(String state : states.keySet()){ + System.out.println(state + " : " + states.get(state)); + } + System.out.println(states.keySet().size()); + } + + + public static void main(String[] args) throws Exception{ + System.out.println("Hello"); + String State = "NV"; + + AttributeCategory data = new AttributeCategory(); + + + // Read file for attributes and write to a file + data.readAttributes(State); + //data.printAttributes(); + //data.buildBusiness_AttributeDataset(State); + + + data.buildCategorySet(State); + data.printCategories(); + //data.writeResCatToFile(State); + + int c = 0; + + + //data.makeCitySet("yelp_dataset"); + + + + + + } + + +} diff --git a/Project/src/yelpDataProcessing/ProcessYelpJson.java b/Project/src/yelpDataProcessing/ProcessYelpJson.java new file mode 100644 index 0000000..94f52ad --- /dev/null +++ b/Project/src/yelpDataProcessing/ProcessYelpJson.java @@ -0,0 +1,220 @@ +package yelpDataProcessing; + +import java.io.*; +import java.util.*; + +import javax.json.Json; +import javax.json.JsonArray; +import javax.json.JsonNumber; +import javax.json.JsonObject; +import javax.json.JsonReader; +import javax.json.JsonString; +import javax.json.JsonValue; +import javax.json.stream.JsonParsingException; + + +/* + * Processes Yelp Dataset Json and creates different files + * - yelp_business - Business dataset in json format. + * - yelp_dataset_retaurant - Restaurant Json data from Yelp + * - + * + * + */ + +public class ProcessYelpJson { + + Set busIds = new HashSet(); + + // Create file - yelp_reviews - in dataset/json + public void createCompleteReviewJson(String yelpDataset) throws IOException{ + BufferedReader br = new BufferedReader(new FileReader(System.getProperty("user.dir")+"/../Dataset/json/"+yelpDataset)); + BufferedWriter bw = new BufferedWriter(new FileWriter(System.getProperty("user.dir")+"/../Dataset/json/complete/reviews")); + String line; + int count = 0; int cr = 0; + while( ((line = br.readLine()) != null) && count < 1199227){ + if(count <= 73770) + count++; + else{ + cr++; + bw.write(line+"\n"); + count++; + } + } + bw.close(); + br.close(); + System.out.println("Reviews : " + cr); + + + } + + // Creates file - yelp_business - in dataset/json folder + public void createCompleteBusinessJson(String yelpDataset) throws IOException{ + BufferedReader br = new BufferedReader(new FileReader(System.getProperty("user.dir")+"/../Dataset/json/"+yelpDataset)); + BufferedWriter bw = new BufferedWriter(new FileWriter(System.getProperty("user.dir")+"/../Dataset/json/complete/business")); + String line; int count = 0; + + while(( (line = br.readLine()) != null) && count < 42151 ){ + bw.write(line + "\n"); + count++; + } + bw.close(); + System.out.println("No. of Businesses : " + count); + + } + + public void createStateBusinessJson(String folder, String state) throws IOException{ + BufferedReader br = new BufferedReader(new FileReader(System.getProperty("user.dir")+"/../Dataset/json/"+folder+"/business")); + BufferedWriter bw = new BufferedWriter(new FileWriter(System.getProperty("user.dir")+"/../Dataset/json/"+state+"/business")); + + String line; + int count = 0; int cr = 0; + while(( (line = br.readLine()) != null) ){ + JsonReader reader = Json.createReader(new StringReader(line)); + JsonObject object = reader.readObject(); + if(object.get("type").toString().equals("\"business\"")){ + JsonValue s = object.get("state"); + JsonString st = (JsonString) s; + String place = st.getString(); + if(place.equals(state)){ + bw.write(line + "\n"); + cr++; + } + } + } + bw.close(); + System.out.println("No. of Businesses in "+state + " : " + cr); + } + + + // Creates file - yelp_dataset_restaurant - in dataset/json folder + public void createRestaurantJson(String folder) throws IOException{ + BufferedReader br = new BufferedReader(new FileReader(System.getProperty("user.dir")+"/../Dataset/json/"+folder+"/business")); + BufferedWriter bw = new BufferedWriter(new FileWriter(System.getProperty("user.dir")+"/../Dataset/json/Restaurant/business")); + + String line; + int count = 0; int cr = 0; + while(( (line = br.readLine()) != null) ){ + JsonReader reader = Json.createReader(new StringReader(line)); + JsonObject object = reader.readObject(); + if(object.get("type").toString().equals("\"business\"")){ + if(object.get("categories").getValueType().toString().equals("ARRAY")){ + JsonArray cat = (JsonArray) object.get("categories"); + for(JsonValue s : cat){ + JsonString c = (JsonString) s; + String category = c.getString(); + if(category.equals("Restaurants")){ + bw.write(line + "\n"); + cr ++; + } + } + } + } + } + bw.close(); + System.out.println("No. of Restaurants : " + cr); + } + + + // Create file - yelp_reviews_restaurant - in dataset/json + public void createBusReviewJson(String folder_complete, String folder) throws IOException{ + makeBusIdSet(folder); + BufferedReader br = new BufferedReader(new FileReader(System.getProperty("user.dir")+"/../Dataset/json/"+folder_complete+"/reviews")); + BufferedWriter bw = new BufferedWriter(new FileWriter(System.getProperty("user.dir")+"/../Dataset/json/"+folder+"/reviews")); + int count = 0; + String line; + while(( (line = br.readLine()) != null) ){ + JsonReader reader = Json.createReader(new StringReader(line)); + JsonObject object = reader.readObject(); + if(object.get("type").toString().equals("\"review\"")){ + JsonValue b_id = object.get("business_id"); + JsonString bus_id = (JsonString) b_id; + String bid = bus_id.getString(); + if(busIds.contains(bid)){ + bw.write(line+"\n"); + count++; + } + } + } + bw.close(); + System.out.println("Reviews Count : " + count); + + } + + // Make a Set of Res-Ids to extract reviews + public void makeBusIdSet(String folder) throws IOException{ + busIds = new HashSet(); + BufferedReader br = new BufferedReader(new FileReader(System.getProperty("user.dir")+"/../Dataset/json/"+folder+"/business")); + String line; + while(( (line = br.readLine()) != null)){ + JsonReader reader = Json.createReader(new StringReader(line)); + JsonObject object = reader.readObject(); + if(object.get("type").toString().equals("\"business\"")){ + JsonValue b_id = object.get("business_id"); + JsonString bus_id = (JsonString) b_id; + String bid = bus_id.getString(); + busIds.add(bid); + } + } + System.out.println("Size of resIds set :" + busIds.size()); + } + + public static void putReviewDatatoFile(String folder) throws IOException{ + BufferedReader br = new BufferedReader(new FileReader(System.getProperty("user.dir")+"/../Dataset/json/"+folder+"/reviews")); + BufferedWriter bw = new BufferedWriter(new FileWriter(System.getProperty("user.dir") + "/../Dataset/data/"+folder+"/reviews.txt")); + String line; + int count = 0; + while((line = br.readLine()) != null){ + JsonReader reader = Json.createReader(new StringReader(line)); + JsonObject object = reader.readObject(); + + JsonValue b_id = object.get("business_id"); + JsonString bus_id = (JsonString) b_id; + String bId = bus_id.getString(); + bw.write("bus_id : " + bId + "\n"); + + JsonValue u_id = object.get("user_id"); + JsonString user_id = (JsonString) u_id; + String uId = user_id.getString(); + bw.write("user_id : " + uId + "\n"); + + + JsonValue star = object.get("stars"); + JsonNumber s = (JsonNumber) star; + Double st = s.doubleValue(); + bw.write("star : " + st + "\n"); + + JsonValue t = object.get("text"); + JsonString te = (JsonString) t; + String text = te.getString(); + bw.write("text: " + t + "\n\n"); + + count++; + } + br.close(); + bw.close(); + System.out.println("Reviews Written : " + count); + } + + public static void main(String [] args) throws Exception{ + String yelpDataset = "yelp_dataset"; + String State = "NV"; + + ProcessYelpJson yelp = new ProcessYelpJson(); + + + //yelp.createCompleteBusinessJson(yelpDataset); + + //yelp.createRestaurantJson("complete"); + yelp.createStateBusinessJson("complete", State); + + //yelp.createCompleteReviewJson(yelpDataset); + + yelp.createBusReviewJson("complete", State); + + yelp.putReviewDatatoFile(State); + + + + } +} diff --git a/Project/src/yelpDataProcessing/reviewData.java b/Project/src/yelpDataProcessing/reviewData.java new file mode 100644 index 0000000..f9ac3e0 --- /dev/null +++ b/Project/src/yelpDataProcessing/reviewData.java @@ -0,0 +1,141 @@ +package yelpDataProcessing; +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.util.*; + +import logisticCMF.Cell; + + +public class reviewData { + + public static Map wordCount = new HashMap(); + public static Map> resWord = new HashMap>(); + public static Map> userWord = new HashMap>(); + public static HashSet words = new HashSet(); + + public static void addWordInMap(String word){ + if(!wordCount.containsKey(word)) + wordCount.put(word, 0); + int wcount = wordCount.get(word); + wcount++; + wordCount.put(word, wcount); + } + + public static void readData(String reviewData) throws IOException{ + BufferedReader br = new BufferedReader(new FileReader(reviewData)); + String line; String bId = null; String userId = null; int countl = 0, countR = 0; + + while((line = br.readLine()) != null){ + countl++; + String[] array = line.split(":"); + + if( array[0].trim().equals("user_id")){ + countR++; + userId = array[1].trim(); + if(!userWord.containsKey(userId)) + userWord.put(userId, new HashSet()); + } + + if( array[0].trim().equals("bus_id")){ + bId = array[1].trim(); + if(!resWord.containsKey(bId)) + resWord.put(bId, new HashSet()); + } + + if(array[0].trim().equals("text")){ + String [] tokens = array[1].trim().split(" "); + if(tokens.length > 0){ + for(String word : tokens){ + word = word.trim(); + if(word.length() >= 3){ + addWordInMap(word); + resWord.get(bId).add(word); + userWord.get(userId).add(word); + } + } + } + } + if(countl % 100000 == 0) + System.out.println("line : "+countl); + } + System.out.println("Total No. of Reviews : " + countR); + } + + public static void getMapStats(Map> enWord){ + int potentialResWordCells = 0; + int min=100000000; + System.out.println("No. of entities = " + enWord.keySet().size()); + System.out.println("No. of total words in Vocab = " + words.size()); + + for(String en : enWord.keySet()){ + min = (enWord.get(en).size() < min) ? enWord.get(en).size() : min; + for(String word : enWord.get(en)){ + potentialResWordCells++; + } + + } + System.out.println("Potential Entity-Word Cells : " + potentialResWordCells); + System.out.println("Min No. of Words in Entity : " + min); + } + + // Remove words from Map[Entity -> Set[words]] that occur few times in dictionary. If Set of words for entity go empty, remove Entity from Map. + public static void pruneEntityWordMap(Map> enWord){ + Iterator it = enWord.keySet().iterator(); + while(it.hasNext()){ + String en = it.next(); + Iterator itr = enWord.get(en).iterator(); + while(itr.hasNext()){ + String word = itr.next(); + if(!words.contains(word)) + itr.remove(); + } + if(enWord.get(en).size() == 0) + it.remove(); + } + } + + + // Make a Array of Words that have frequency above the given threshold. + public static void makePrunedWordList(int occThresh){ + words = new HashSet(); int count = 0; + for(String word : wordCount.keySet()){ + if(wordCount.get(word) > occThresh){ + count++; + words.add(word); + } + } + System.out.println("Words with greater than occurence of " + occThresh + " : " + words.size()); + } + + public static int countLines(String file) throws IOException{ + BufferedReader br = new BufferedReader(new FileReader(file)); + String line; int count = 0; + while((line = br.readLine()) != null){ + count++; + } + return count; + } + + + public static void main(String [] args) throws IOException{ + + String reviewData = System.getProperty("user.dir")+"/../Dataset/data/ON/reviews_textProc.txt"; + readData(reviewData); + + int occThresh = 1; + System.out.println("Total Words in Review Data : " + wordCount.keySet().size()); + int count = 0; + + /*for(occThresh = 0; occThresh <= 50; occThresh++){ + makePrunedWordList(occThresh); + }*/ + + makePrunedWordList(4); + pruneEntityWordMap(resWord); + getMapStats(resWord); + + //pruneEntityWordMap(userWord, occThresh); + //getMapStats(userWord); + } +} diff --git a/Project/src/yelpDataProcessing/reviewJson.java b/Project/src/yelpDataProcessing/reviewJson.java new file mode 100644 index 0000000..8add377 --- /dev/null +++ b/Project/src/yelpDataProcessing/reviewJson.java @@ -0,0 +1,131 @@ +package yelpDataProcessing; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.StringReader; +import java.util.*; + +import javax.json.Json; +import javax.json.JsonArray; +import javax.json.JsonNumber; +import javax.json.JsonObject; +import javax.json.JsonReader; +import javax.json.JsonString; +import javax.json.JsonValue; + +import logisticCMF.Cell; + +public class reviewJson { + + public static HashSet resIds = new HashSet(); // Set - [Restaurant Ids] + + // Reads Yelp Dataset Json - extracts only Review Jsons + public static void extractReviewJson(String fileAddress) throws IOException{ + BufferedReader br = new BufferedReader(new FileReader(fileAddress)); + BufferedWriter bw = new BufferedWriter(new FileWriter(System.getProperty("user.dir") + "/Data/json/yelp_reviews.txt")); + String line; + int count=1; + while( ((line = br.readLine()) != null) && count < 1199228){ + if(count <= 73770) + count++; + else{ + bw.write(line+"\n"); + count++; + } + } + bw.close(); + br.close(); + } + + // Makes resIds Set - Then extracts json objects for restaurant reviews and stores them in file + public static void extractResReviewJson() throws IOException{ + makeResIds(); + BufferedReader br = new BufferedReader(new FileReader(System.getProperty("user.dir") + "/Data/json/yelp_reviews.txt")); + BufferedWriter bw = new BufferedWriter(new FileWriter(System.getProperty("user.dir") + "/Data/json/yelp_reviews_restaurants.txt")); + String line; + int count = 0; + while(( (line = br.readLine()) != null) ){ + JsonReader reader = Json.createReader(new StringReader(line)); + JsonObject object = reader.readObject(); + if(object.get("type").toString().equals("\"review\"")){ + JsonValue b_id = object.get("business_id"); + JsonString bus_id = (JsonString) b_id; + String bId = bus_id.getString(); + if(resIds.contains(bId)){ + bw.write(line.trim() + "\n"); + count++; + } + } + } + + System.out.println("Count : " + count); + br.close(); + bw.close(); + } + + // Reads file that contains Restaurant Ids and stores in Set - resIds + public static void makeResIds() throws IOException{ + String fA = System.getProperty("user.dir") + "/Data/new/restaurant_ids"; + BufferedReader br = new BufferedReader(new FileReader(fA)); + String line; + while((line = br.readLine()) != null){ + resIds.add(line.trim()); + } + br.close(); + System.out.println("No. of res : " + resIds.size()); + } + + public static void putReviewDatatoFile(String fileAddress ) throws IOException{ + BufferedReader br = new BufferedReader(new FileReader(fileAddress)); + BufferedWriter bw = new BufferedWriter(new FileWriter(System.getProperty("user.dir") + "/../Data/new/res_review_data.txt")); + String line; + int count = 0; String relation = "restaurant-user"; + while((line = br.readLine()) != null){ + JsonReader reader = Json.createReader(new StringReader(line)); + JsonObject object = reader.readObject(); + + JsonValue b_id = object.get("business_id"); + JsonString bus_id = (JsonString) b_id; + String bId = bus_id.getString(); + bw.write("bus_id : " + bId + "\n"); + + JsonValue u_id = object.get("user_id"); + JsonString user_id = (JsonString) u_id; + String uId = user_id.getString(); + bw.write("user_id : " + uId + "\n"); + + + JsonValue star = object.get("stars"); + JsonNumber s = (JsonNumber) star; + Double st = s.doubleValue(); + bw.write("star : " + st + "\n"); + + JsonValue t = object.get("text"); + JsonString te = (JsonString) t; + String text = te.getString(); + bw.write("text: " + t + "\n\n"); + + count++; + } + br.close(); + bw.close(); + System.out.println(relation + " : " + count); + } + + + + public static void main(String [] args) throws IOException{ + //convertResAttLogisticReadableFile(System.getProperty("user.dir")+"/Data/yelp_dataset_restaurant_att"); + //String fileAddress = System.getProperty("user.dir") + "/../Dataset/yelp_dataset"; + String fileAddress = System.getProperty("user.dir") + "/../Data/json/yelp_reviews_restaurants.txt"; + //putReviewDatatoFile(fileAddress); + //extractReviewJson(fileAddress); + //extractResReviewJson(); + + } + +} diff --git a/PythonScript/clean_text.py b/PythonScript/clean_text.py new file mode 100644 index 0000000..c247d04 --- /dev/null +++ b/PythonScript/clean_text.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +## Inputs review file in format + +# businessId : asnhdjkasld +# userId : kjhdjkfadfjbasdbjfbasdb +# stars : 4.0 +# text : I was amazed with the quality of the food + +##### OUTPUTS the file in the same order, but tokenizes, removes punctuatuations, removes stop words and stems the words before outputing. Also each word in review occurs once in output. + + +from nltk.corpus import stopwords +from nltk.tokenize import word_tokenize +import re +import string +from nltk.stem import PorterStemmer + +stemmer = PorterStemmer() + +def getTokens(doc): + doc = re.sub("\d+", "", doc) + + tokenized = word_tokenize(doc.decode('utf-8')) + + regex = re.compile('[%s]' % re.escape(string.punctuation)) #see documentation here: http://docs.python.org/2/library/string.html + + tokenized_no_punctuation = [] + + new_review = [] + + + ## REMOVING PUNCTUATION + #for token in tokenized: + # new_token = regex.sub(u'', token.lower()) + # if not new_token == u'': + # tokenized_no_punctuation.append(new_token) + + tokenized_no_punctuation = [re.sub(r'[^A-Za-z0-9]+', '', x.lower()) for x in tokenized] + tokenized_no_punctuation = [s for s in tokenized_no_punctuation if (len(s)>1)] + + #print tokenized_no_punctuation + + token_no_stop = [] + ## REMOVING STOP WORDS + for word in tokenized_no_punctuation: + if not word in stopwords.words('english'): + try: + word = stemmer.stem(word.encode('utf-8')) + except UnicodeDecodeError: + word = word #.encode('utf-8') + token_no_stop.append(word.encode('utf-8')) + + + return token_no_stop + + + +fin = open('reviews.txt', 'r') +fout = open('reviews_textProc.txt', 'w') +count = 0 +for line in fin: +# print count + if(count %100000 == 0): + print count + if(line.strip().split(':')[0] == 'text'): + tokens = getTokens(line.strip().split(':')[1]) + tokenSet = set() + for i in tokens: + if (len(i) >= 3): + tokenSet.add(i) + fout.write("text : ") + fout.write(" ".join(tokenSet)) + fout.write("\n\n") + else: + fout.write(line) + count = count + 1 diff --git a/PythonScript/clean_text.py~ b/PythonScript/clean_text.py~ new file mode 100644 index 0000000..c247d04 --- /dev/null +++ b/PythonScript/clean_text.py~ @@ -0,0 +1,78 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +## Inputs review file in format + +# businessId : asnhdjkasld +# userId : kjhdjkfadfjbasdbjfbasdb +# stars : 4.0 +# text : I was amazed with the quality of the food + +##### OUTPUTS the file in the same order, but tokenizes, removes punctuatuations, removes stop words and stems the words before outputing. Also each word in review occurs once in output. + + +from nltk.corpus import stopwords +from nltk.tokenize import word_tokenize +import re +import string +from nltk.stem import PorterStemmer + +stemmer = PorterStemmer() + +def getTokens(doc): + doc = re.sub("\d+", "", doc) + + tokenized = word_tokenize(doc.decode('utf-8')) + + regex = re.compile('[%s]' % re.escape(string.punctuation)) #see documentation here: http://docs.python.org/2/library/string.html + + tokenized_no_punctuation = [] + + new_review = [] + + + ## REMOVING PUNCTUATION + #for token in tokenized: + # new_token = regex.sub(u'', token.lower()) + # if not new_token == u'': + # tokenized_no_punctuation.append(new_token) + + tokenized_no_punctuation = [re.sub(r'[^A-Za-z0-9]+', '', x.lower()) for x in tokenized] + tokenized_no_punctuation = [s for s in tokenized_no_punctuation if (len(s)>1)] + + #print tokenized_no_punctuation + + token_no_stop = [] + ## REMOVING STOP WORDS + for word in tokenized_no_punctuation: + if not word in stopwords.words('english'): + try: + word = stemmer.stem(word.encode('utf-8')) + except UnicodeDecodeError: + word = word #.encode('utf-8') + token_no_stop.append(word.encode('utf-8')) + + + return token_no_stop + + + +fin = open('reviews.txt', 'r') +fout = open('reviews_textProc.txt', 'w') +count = 0 +for line in fin: +# print count + if(count %100000 == 0): + print count + if(line.strip().split(':')[0] == 'text'): + tokens = getTokens(line.strip().split(':')[1]) + tokenSet = set() + for i in tokens: + if (len(i) >= 3): + tokenSet.add(i) + fout.write("text : ") + fout.write(" ".join(tokenSet)) + fout.write("\n\n") + else: + fout.write(line) + count = count + 1 diff --git a/PythonScript/combineAllPredData.py b/PythonScript/combineAllPredData.py new file mode 100644 index 0000000..e652ecb --- /dev/null +++ b/PythonScript/combineAllPredData.py @@ -0,0 +1,29 @@ +# From Logisitc CMF : python PythonScript/combineAllPredData.py Embeddings_Prediction_Data HeldOut + +import sys +import os +from os import walk + +embeddingsPath = sys.argv[1]; +evToMerge = sys.argv[2]; +print embeddingsPath+"/All/pred-data/"+evToMerge +if not os.path.exists(embeddingsPath+"/All/pred-data/"+evToMerge): + os.makedirs(embeddingsPath+"/All/pred-data/"+evToMerge) + +foldersToMerge = ['AZ', 'NV', 'WI', 'EDH'] + +print embeddingsPath+"/WI/pred-data/"+evToMerge +files = [] +for (dirpath, dirnames, filenames) in walk(embeddingsPath+"/WI/pred-data/"+evToMerge): + files.extend(filenames) + break + +print files; + +for folder in foldersToMerge: + for filename in files: + with open(embeddingsPath+"/All/pred-data/"+evToMerge+"/"+filename, 'a') as outfile: + with open(embeddingsPath+"/"+folder+"/pred-data/"+evToMerge+"/"+filename) as infile: + for line in infile: + outfile.write(line) + diff --git a/PythonScript/combineAllPredData.py~ b/PythonScript/combineAllPredData.py~ new file mode 100644 index 0000000..e652ecb --- /dev/null +++ b/PythonScript/combineAllPredData.py~ @@ -0,0 +1,29 @@ +# From Logisitc CMF : python PythonScript/combineAllPredData.py Embeddings_Prediction_Data HeldOut + +import sys +import os +from os import walk + +embeddingsPath = sys.argv[1]; +evToMerge = sys.argv[2]; +print embeddingsPath+"/All/pred-data/"+evToMerge +if not os.path.exists(embeddingsPath+"/All/pred-data/"+evToMerge): + os.makedirs(embeddingsPath+"/All/pred-data/"+evToMerge) + +foldersToMerge = ['AZ', 'NV', 'WI', 'EDH'] + +print embeddingsPath+"/WI/pred-data/"+evToMerge +files = [] +for (dirpath, dirnames, filenames) in walk(embeddingsPath+"/WI/pred-data/"+evToMerge): + files.extend(filenames) + break + +print files; + +for folder in foldersToMerge: + for filename in files: + with open(embeddingsPath+"/All/pred-data/"+evToMerge+"/"+filename, 'a') as outfile: + with open(embeddingsPath+"/"+folder+"/pred-data/"+evToMerge+"/"+filename) as infile: + for line in infile: + outfile.write(line) + diff --git a/PythonScript/getPRCurveData.py b/PythonScript/getPRCurveData.py new file mode 100644 index 0000000..8f1ee6d --- /dev/null +++ b/PythonScript/getPRCurveData.py @@ -0,0 +1,28 @@ +# From Logistic_CMF : python PythonScript/getPRCurveData.py Embeddings_Prediction_Data All HeldOut + +from os import walk +import sys +import os + +embeddingsPath = sys.argv[1] +folderToTest = sys.argv[2] +evToTest = sys.argv[3] + +path = embeddingsPath+"/"+folderToTest+"/pred-data/"+evToTest + +files = [] +for (dirpath, dirnames, filenames) in walk(path): + files.extend(filenames) + break + +for fileName in files: + f = open(path+"/"+fileName, 'r') + if not os.path.exists(path+"/PRCurve"): + os.makedirs(path+"/PRCurve") + o = open(path+"/PRCurve/"+fileName, 'w') + for line in f: + line.strip(); + a = line.split("::") + o.write(a[2].strip() + "\t" + a[3].strip() + "\n"); + o.close(); + f.close(); diff --git a/PythonScript/getPRCurveData.py~ b/PythonScript/getPRCurveData.py~ new file mode 100644 index 0000000..8f1ee6d --- /dev/null +++ b/PythonScript/getPRCurveData.py~ @@ -0,0 +1,28 @@ +# From Logistic_CMF : python PythonScript/getPRCurveData.py Embeddings_Prediction_Data All HeldOut + +from os import walk +import sys +import os + +embeddingsPath = sys.argv[1] +folderToTest = sys.argv[2] +evToTest = sys.argv[3] + +path = embeddingsPath+"/"+folderToTest+"/pred-data/"+evToTest + +files = [] +for (dirpath, dirnames, filenames) in walk(path): + files.extend(filenames) + break + +for fileName in files: + f = open(path+"/"+fileName, 'r') + if not os.path.exists(path+"/PRCurve"): + os.makedirs(path+"/PRCurve") + o = open(path+"/PRCurve/"+fileName, 'w') + for line in f: + line.strip(); + a = line.split("::") + o.write(a[2].strip() + "\t" + a[3].strip() + "\n"); + o.close(); + f.close(); diff --git a/PythonScript/getPRF.py b/PythonScript/getPRF.py new file mode 100644 index 0000000..c51be2c --- /dev/null +++ b/PythonScript/getPRF.py @@ -0,0 +1,22 @@ +# From Logistic CMF : python PythonScript/getPRF.py Embeddings_Prediction_Data WI HeldOut + +import prf; +from os import walk +import sys + +embeddingsPath = sys.argv[1] +folderToTest = sys.argv[2] +evToTest = sys.argv[3] + +path = embeddingsPath+"/"+folderToTest+"/pred-data/"+evToTest +print path +fs = [] +for (dirpath, dirnames, filenames) in walk(path): + fs.extend(filenames) + break +fs.sort() +for fileName in fs: + print fileName + print prf.getPRF(path+"/"+fileName) + + diff --git a/PythonScript/getPRF.py~ b/PythonScript/getPRF.py~ new file mode 100644 index 0000000..c51be2c --- /dev/null +++ b/PythonScript/getPRF.py~ @@ -0,0 +1,22 @@ +# From Logistic CMF : python PythonScript/getPRF.py Embeddings_Prediction_Data WI HeldOut + +import prf; +from os import walk +import sys + +embeddingsPath = sys.argv[1] +folderToTest = sys.argv[2] +evToTest = sys.argv[3] + +path = embeddingsPath+"/"+folderToTest+"/pred-data/"+evToTest +print path +fs = [] +for (dirpath, dirnames, filenames) in walk(path): + fs.extend(filenames) + break +fs.sort() +for fileName in fs: + print fileName + print prf.getPRF(path+"/"+fileName) + + diff --git a/PythonScript/prf.py b/PythonScript/prf.py new file mode 100644 index 0000000..2373821 --- /dev/null +++ b/PythonScript/prf.py @@ -0,0 +1,39 @@ +## Call getPRF(fileName) to get [P, R, F] + +import numpy as np; +from sklearn.metrics import precision_recall_fscore_support as prf + +def readFile(fileName): + y_pred = [] + y_true = [] + y_pred_true = [] + f = open(fileName, 'r'); + for line in f: + line.strip(); + a = line.split("::") + if(float(a[2].strip()) >= 0.5): + pred = 1; + y_pred.append(pred); + else: + pred = 0; + y_pred.append(pred); + + true = float(a[3].strip()); + y_true.append(true); + y_pred_true.append(y_pred) + y_pred_true.append(y_true) + return y_pred_true + +def getPRF(fileName): + y_p_t = readFile(fileName) + y_pred = y_p_t[0]; + y_true = y_p_t[1]; + acc = prf(y_true, y_pred, average = 'micro'); + p = round(acc[0]*100, 1); + r = round(acc[1]*100, 1); + f = round(acc[2]*100, 1); + return np.array([p,r,f]) + +#fileName = "A-A" +#print getPRF(fileName) + diff --git a/PythonScript/prf.pyc b/PythonScript/prf.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c908aca47a7b3fa754bb9e7690f692013c1398c7 GIT binary patch literal 1302 zcmb_a!EVz)5S_IXCk>5SR0=8}A#uv7kvLWe5Jf#fR5_Ipl~82an`B$ZHoF@{B1@GI#+@N z$^)8(blxD9h4c{%P0AVqA`Tt)n%HK3;TiN4pnm{US)CeZ%3=hNZM;mao31!YZIwy0Kg(V)7;8&t7JS@fTGb4csTA zU5DZT51h1_2oIhNdu?Pj@SLo5T;`)A!#^a-&-bT>mkeVQ!bmAS*QBg;;kmHRTT`u3 zl^ZW;oR^9B5mu85aaoTm>G`g)$@L(aX<1y1(1mORy`5`+10iegNalR|SnkVBp@nB6 zQ`yKoIIM?1iE+Lz?$l`>yWlM5rN z`2w)k2mO06{}=LlF>2$U0aB3QF>bXh= 0.5): + pred = 1; + y_pred.append(pred); + else: + pred = 0; + y_pred.append(pred); + + true = float(a[3].strip()); + y_true.append(true); + y_pred_true.append(y_pred) + y_pred_true.append(y_true) + return y_pred_true + +def getPRF(fileName): + y_p_t = readFile(fileName) + y_pred = y_p_t[0]; + y_true = y_p_t[1]; + acc = prf(y_true, y_pred, average = 'micro'); + p = round(acc[0]*100, 1); + r = round(acc[1]*100, 1); + f = round(acc[2]*100, 1); + return np.array([p,r,f]) + +#fileName = "A-A" +#print getPRF(fileName) + diff --git a/PythonScript/writePRFTable.py b/PythonScript/writePRFTable.py new file mode 100644 index 0000000..e89284e --- /dev/null +++ b/PythonScript/writePRFTable.py @@ -0,0 +1,36 @@ +# From Logisitc CMF : python PythonScript/writePRFTable.py Embeddings_Prediction_Data A HeldOut + +import sys +import os +from os import walk +import prf + +embeddingsPath = sys.argv[1] +relation = sys.argv[2] +evToTest = sys.argv[3] + +writePath = embeddingsPath+"/Tables/"+relation+"-"+evToTest +out = open(writePath, 'w') + +foldersToWrite = ['AZ', 'NV', 'WI', 'EDH', 'All'] +filesToWrite = [] +for (dirpath, dirnames, filenames) in walk(embeddingsPath+"/WI/pred-data/"+evToTest): + for filename in filenames: + r = filename.split("-")[0].strip() + if r == relation: + filesToWrite.append(filename) + break + +for model in filesToWrite: + out.write("\\textbf{"+model.split("-")[1].strip()+"}\n") + for folder in foldersToWrite: + PRF = prf.getPRF(embeddingsPath+"/"+folder+"/pred-data/"+evToTest+"/"+model) + out.write("& " + str(PRF[0]) + "\t & " + str(PRF[1]) + "\t & " + str(PRF[2]) + "\n") + out.write("\\\\ \n") + + + + + + + diff --git a/PythonScript/writePRFTable.py~ b/PythonScript/writePRFTable.py~ new file mode 100644 index 0000000..e89284e --- /dev/null +++ b/PythonScript/writePRFTable.py~ @@ -0,0 +1,36 @@ +# From Logisitc CMF : python PythonScript/writePRFTable.py Embeddings_Prediction_Data A HeldOut + +import sys +import os +from os import walk +import prf + +embeddingsPath = sys.argv[1] +relation = sys.argv[2] +evToTest = sys.argv[3] + +writePath = embeddingsPath+"/Tables/"+relation+"-"+evToTest +out = open(writePath, 'w') + +foldersToWrite = ['AZ', 'NV', 'WI', 'EDH', 'All'] +filesToWrite = [] +for (dirpath, dirnames, filenames) in walk(embeddingsPath+"/WI/pred-data/"+evToTest): + for filename in filenames: + r = filename.split("-")[0].strip() + if r == relation: + filesToWrite.append(filename) + break + +for model in filesToWrite: + out.write("\\textbf{"+model.split("-")[1].strip()+"}\n") + for folder in foldersToWrite: + PRF = prf.getPRF(embeddingsPath+"/"+folder+"/pred-data/"+evToTest+"/"+model) + out.write("& " + str(PRF[0]) + "\t & " + str(PRF[1]) + "\t & " + str(PRF[2]) + "\n") + out.write("\\\\ \n") + + + + + + + diff --git a/README.md b/README.md new file mode 100644 index 0000000..ada5b5b --- /dev/null +++ b/README.md @@ -0,0 +1,21 @@ +# README # + +### What is this repository for? ### + +* Yelp Dataset Challenge + * Parse Yelp Json format data to get required data in the required format + * Perform logisitc CMF on the parsed data to predict relations. + +### Details on Logistic CMF Code ### + +* This Java project currently has 2 packages : + * yelpDataProcessing - Contains classes/functions to read the yelp dataset in json format and parse to get different data in required format. + * logisticCMF - Contains classes/functions to read data produced in required format and then split train/validation/test data. Learn the embeddings for entities and print prediction evaluation. + +* The folder PythonSCript contains a file cleantext.py that reads the yelp review data in user format and pre-processes the text review. + * The text pre-processing contains tokenization, stemming, removal of stop words and punctuations. + * Each word is kept only once if occurs multiple times in a review. + +#### Project Contributors #### +* Nitish Gupta +* Sameer Singh \ No newline at end of file