From 3abbbdd59f66077df3313d9da2a770aa5239d6a5 Mon Sep 17 00:00:00 2001 From: Jens Grivolla Date: Thu, 25 Apr 2013 18:22:55 +0200 Subject: [PATCH] initial commit of simple myrrix example (not really tested) --- myrrix-web/pom.xml | 44 ++ myrrix-web/src/main/webapp/WEB-INF/web.xml | 203 +++++++++ myrrix-web/src/main/webapp/error.jspx | 39 ++ myrrix-web/src/main/webapp/index.jspx | 386 ++++++++++++++++++ myrrix-web/src/main/webapp/som.jspx | 134 ++++++ myrrix-web/src/main/webapp/status.jspx | 79 ++++ pom.xml | 35 ++ rescorer/pom.xml | 39 ++ .../rescorer/LongPairMultiplyRescorer.java | 28 ++ .../rescorer/MultiplyRescorer.java | 27 ++ .../rescorer/MultiplyRescorerProvider.java | 121 ++++++ 11 files changed, 1135 insertions(+) create mode 100644 myrrix-web/pom.xml create mode 100644 myrrix-web/src/main/webapp/WEB-INF/web.xml create mode 100644 myrrix-web/src/main/webapp/error.jspx create mode 100644 myrrix-web/src/main/webapp/index.jspx create mode 100644 myrrix-web/src/main/webapp/som.jspx create mode 100644 myrrix-web/src/main/webapp/status.jspx create mode 100644 pom.xml create mode 100644 rescorer/pom.xml create mode 100644 rescorer/src/main/java/org/barcelonamedia/myrrixexample/rescorer/LongPairMultiplyRescorer.java create mode 100644 rescorer/src/main/java/org/barcelonamedia/myrrixexample/rescorer/MultiplyRescorer.java create mode 100644 rescorer/src/main/java/org/barcelonamedia/myrrixexample/rescorer/MultiplyRescorerProvider.java diff --git a/myrrix-web/pom.xml b/myrrix-web/pom.xml new file mode 100644 index 0000000..26aec45 --- /dev/null +++ b/myrrix-web/pom.xml @@ -0,0 +1,44 @@ + + + + myrrix-example-parent + org.barcelonamedia.myrrixexample + 0.1 + + 4.0.0 + myrrix-web + war + Myrrix Web (WAR) + 0.1 + Customized Myrrix WAR with rescorer + http://myrrix.com/documentation-serving-layer/ + 2012 + + myrrix-web + + + + net.myrrix + myrrix-web-common + 0.11 + + + org.barcelonamedia.myrrixexample + rescorer + ${project.version} + + + + + sonatype-nexus-releases + Sonatype Nexus Releases + https://oss.sonatype.org/content/repositories/releases + + + sonatype-nexus-snapshot + Sonatype Nexus Snapshots + https://oss.sonatype.org/content/repositories/snapshots + + + diff --git a/myrrix-web/src/main/webapp/WEB-INF/web.xml b/myrrix-web/src/main/webapp/WEB-INF/web.xml new file mode 100644 index 0000000..38f29d0 --- /dev/null +++ b/myrrix-web/src/main/webapp/WEB-INF/web.xml @@ -0,0 +1,203 @@ + + + + + + net.myrrix.web.InitListener.RESCORER_PROVIDER_CLASS + org.barcelonamedia.myrrixexample.rescorer.MultiplyRescorerProvider + + + + + Myrrix Serving Layer web application + Myrrix + + + + + net.myrrix.web.InitListener + + + + PreferenceServlet + net.myrrix.web.servlets.PreferenceServlet + 1 + + + IngestServlet + net.myrrix.web.servlets.IngestServlet + 1 + + /tmp + + + + RecommendServlet + net.myrrix.web.servlets.RecommendServlet + 1 + + + RecommendToManyServlet + net.myrrix.web.servlets.RecommendToManyServlet + 1 + + + RecommendToAnonymousServlet + net.myrrix.web.servlets.RecommendToAnonymousServlet + 1 + + + SimilarityServlet + net.myrrix.web.servlets.SimilarityServlet + 1 + + + EstimateServlet + net.myrrix.web.servlets.EstimateServlet + 1 + + + BecauseServlet + net.myrrix.web.servlets.BecauseServlet + 1 + + + RefreshServlet + net.myrrix.web.servlets.RefreshServlet + 1 + + + ReadyServlet + net.myrrix.web.servlets.ReadyServlet + 1 + + + AllUserIDsServlet + net.myrrix.web.servlets.AllUserIDsServlet + 1 + + + AllItemIDsServlet + net.myrrix.web.servlets.AllItemIDsServlet + 1 + + + LogServlet + net.myrrix.web.servlets.LogServlet + 1 + + + + PreferenceServlet + /pref/* + + + IngestServlet + /ingest/* + + + RecommendServlet + /recommend/* + + + RecommendToManyServlet + /recommendToMany/* + + + RecommendToAnonymousServlet + /recommendToAnonymous/* + + + SimilarityServlet + /similarity/* + + + EstimateServlet + /estimate/* + + + BecauseServlet + /because/* + + + RefreshServlet + /refresh/* + + + ReadyServlet + /ready + + + AllUserIDsServlet + /user/allIDs + + + AllItemIDsServlet + /item/allIDs + + + LogServlet + /log.txt + + + + index.jspx + + + + java.lang.Throwable + /error.jspx + + + 400 + /error.jspx + + + 401 + /error.jspx + + + 404 + /error.jspx + + + 405 + /error.jspx + + + 500 + /error.jspx + + + 503 + /error.jspx + + + \ No newline at end of file diff --git a/myrrix-web/src/main/webapp/error.jspx b/myrrix-web/src/main/webapp/error.jspx new file mode 100644 index 0000000..e5eca76 --- /dev/null +++ b/myrrix-web/src/main/webapp/error.jspx @@ -0,0 +1,39 @@ + + + + + + +]]> + + +Error + + + +

Error ${pageContext.errorData.statusCode} : ${pageContext.errorData.requestURI}

+

+
+Throwable t = pageContext.getErrorData().getThrowable();
+if (t != null) {
+  t.printStackTrace(new PrintWriter(out));
+}
+
+

+ + +
diff --git a/myrrix-web/src/main/webapp/index.jspx b/myrrix-web/src/main/webapp/index.jspx new file mode 100644 index 0000000..85b4690 --- /dev/null +++ b/myrrix-web/src/main/webapp/index.jspx @@ -0,0 +1,386 @@ + + + + + + + + + + + + + + + +]]> + +]]> + +Myrrix Serving Layer + + + + + + + + +
+ +

Myrrix Serving Layer

+ +
+ +

Machine

+ + +System.gc(); // Helps make the heap reported more reliable: +pageContext.setAttribute("jvmEnv", new JVMEnvironment()); + +ServerRecommender rec = (ServerRecommender) application.getAttribute(AbstractMyrrixServlet.RECOMMENDER_KEY); +pageContext.setAttribute("rec", rec); + +boolean readOnly = (Boolean) application.getAttribute(AbstractMyrrixServlet.READ_ONLY_KEY); + +String localInputDir = String.valueOf(application.getAttribute(AbstractMyrrixServlet.LOCAL_INPUT_DIR_KEY)); +pageContext.setAttribute("localInputDir", localInputDir); + +String localInputDirDisplay; +if (localInputDir.length() > 32) { + localInputDirDisplay = localInputDir.substring(0, 32) + "..."; +} else { + localInputDirDisplay = localInputDir; +} +pageContext.setAttribute("localInputDirDisplay", localInputDirDisplay); + +RunningAverage estimateError = (RunningAverage) application.getAttribute(PreferenceServlet.AVG_ESTIMATE_ERROR_KEY); +float avgEstimateError = estimateError == null ? 0.0f : (float) estimateError.getAverage(); +if (Float.isNaN(avgEstimateError)) { + avgEstimateError = 0.0f; +} +pageContext.setAttribute("avgEstimateError", avgEstimateError); + +pageContext.setAttribute("partition", application.getAttribute(AbstractMyrrixServlet.PARTITION_KEY)); +ReloadingReference<List<?>> partitionRef = + (ReloadingReference<List<?>>) application.getAttribute(AbstractMyrrixServlet.ALL_PARTITIONS_REF_KEY); +int numPartitions = partitionRef == null ? 0 : partitionRef.get().size(); +pageContext.setAttribute("numPartitions", numPartitions); + + + + + + + + + if (numPartitions > 1) { + + + + } + +
Host${jvmEnv.hostName}
Cores${jvmEnv.numProcessors}
Heap Used${jvmEnv.usedMemoryMB}MB (${jvmEnv.percentUsedMemory}%)
LogsView
Partition#${partition} (${numPartitions} total)
+ +
+ +

Recommender

+ + + + if (rec.getBucket() != null || rec.getInstanceID() != null) { + + + + + } + + + + + + +
Bucket${rec.bucket}
Instance ID${rec.instanceID}
Local Input Dir${localInputDirDisplay}
Ready?${rec.ready}
User Count${rec.generationManager.currentGeneration.numUsers}
Item Count${rec.generationManager.currentGeneration.numItems}
Avg. Estimate Error (beta)${avgEstimateError}
+ +
+ +
+ +

Endpoint Stats +

+ + + +
+ + +

Feature Space Map +

+ + + +
+ +

Test

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + if (readOnly) { + + + + + + } else { + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } + + +
&nbsp;&nbsp;User ID
/recommend/&#x27AC;

&nbsp;&nbsp;Item ID 1&nbsp;&nbsp;(Item ID 2)
/recommendToAnonymous//&#x27AC;

&nbsp;&nbsp;Item ID 1&nbsp;&nbsp;(Item ID 2)
/similarity//&#x27AC;

&nbsp;&nbsp;User ID&nbsp;&nbsp;Item ID
/estimate//&#x27AC;

&nbsp;&nbsp;User ID&nbsp;&nbsp;Item ID
/because//&#x27AC;

(Read-only Mode)
&nbsp;&nbsp;User ID&nbsp;&nbsp;Item ID
/pref//&#x27AC;

/ingest + + &#x27AC;

/ingest + + &#x27AC;

/refresh&#x27AC;
+ +
+ +
+
[]
+
+ +
+ +

&copy; Myrrix Ltd, except for included third-party open source software. +Full details of licensing at http://myrrix.com/legal/

+ +
+ + +
\ No newline at end of file diff --git a/myrrix-web/src/main/webapp/som.jspx b/myrrix-web/src/main/webapp/som.jspx new file mode 100644 index 0000000..845afe8 --- /dev/null +++ b/myrrix-web/src/main/webapp/som.jspx @@ -0,0 +1,134 @@ + + + + + + + + + + + + + +]]> + +]]> + +Myrrix Serving Layer + + + + +String maxMapSizeParam = request.getParameter("maxMapSize"); +int maxMapSize = maxMapSizeParam == null ? 40 : Integer.parseInt(maxMapSizeParam); +int nodeMaxSize = 9; +boolean userMatrix = "user".equalsIgnoreCase(request.getParameter("matrix")); +ServerRecommender rec = (ServerRecommender) application.getAttribute(AbstractMyrrixServlet.RECOMMENDER_KEY); +Generation generation = rec.getGenerationManager().getCurrentGeneration(); +if (generation != null) { + FastByIDMap<float[]> matrix = userMatrix ? generation.getX() : generation.getY(); + if (matrix != null && !matrix.isEmpty()) { + Node[][] map = new SelfOrganizingMaps().buildSelfOrganizedMap(userMatrix ? generation.getX() : generation.getY(), + maxMapSize); + + + + for (Node[] mapRow : map) { + + + for (Node node : mapRow) { + float[] projection3D = node.getProjection3D(); + String hexColor = '#' + + percentageToHexByte(projection3D[0]) + + percentageToHexByte(projection3D[1]) + + percentageToHexByte(projection3D[2]); + pageContext.setAttribute("hexColor", hexColor); + + + } + + } +
+ + List<Pair<Double,Long>> contentIDs = node.getAssignedIDs(); + int n = FastMath.min(nodeMaxSize, contentIDs.size()); + if (n > 0) { + int rowSize = (int) FastMath.round(FastMath.sqrt(n)); + +
+ + Iterator<Pair<Double,Long>> it = contentIDs.iterator(); + int k = 0; + while (k < nodeMaxSize && it.hasNext()) { + out.write(String.valueOf(it.next().getSecond())); + out.write(' '); + if ((k+1) % rowSize == 0 && k != n-1) { + out.write("<br/>"); + } + k++; + } + if (it.hasNext()) { + out.write("..."); + } + +
+ + k = 0; + while (k < n) { + out.write('ยท'); + if ((k+1) % rowSize == 0 && k != n-1) { + out.write("<br/>"); + } + k++; + } + + } +
+ + } +} + + + + + 255) { + value = 255; + } + if (value < 16) { + return "0" + byteToHexChar(value); + } + return String.valueOf(byteToHexChar(value / 16)) + byteToHexChar(value % 16); +} +private static char byteToHexChar(int value) { + if (value < 10) { + return (char) ('0' + value); + } + return (char) ('A' + (value - 10)); +} +]]> + +
diff --git a/myrrix-web/src/main/webapp/status.jspx b/myrrix-web/src/main/webapp/status.jspx new file mode 100644 index 0000000..36fcfd0 --- /dev/null +++ b/myrrix-web/src/main/webapp/status.jspx @@ -0,0 +1,79 @@ + + + + + + + + + +]]> + +System.gc(); // Helps make the heap reported more reliable: +pageContext.setAttribute("jvmEnv", new JVMEnvironment()); +pageContext.setAttribute("rec", application.getAttribute(AbstractMyrrixServlet.RECOMMENDER_KEY)); + + + + ${jvmEnv.maxMemory} + ${jvmEnv.usedMemory} + ${jvmEnv.numProcessors} + + + ${rec.instanceID} + + + ${rec.ready} + ${rec.generationManager.currentGeneration.numUsers} + ${rec.generationManager.currentGeneration.numItems} + + + // Just looks awful doesn't it? + ReloadingReference<List<List<Pair<String,Integer>>>> ref = + (ReloadingReference<List<List<Pair<String,Integer>>>>) + application.getAttribute(AbstractMyrrixServlet.ALL_PARTITIONS_REF_KEY); + if (ref != null) { + + + + pageContext.setAttribute("thisPartition", application.getAttribute(AbstractMyrrixServlet.PARTITION_KEY)); + int partitionNumber = 0; + for (List<Pair<String,Integer>> partition : ref.get()) { + pageContext.setAttribute("partitionNumber", partitionNumber); + + ${thisPartition} + + ${partitionNumber} + + for (Pair<String,Integer> replica : partition) { + pageContext.setAttribute("replica", replica); + + ${replica.first}:${replica.second} + + } + + + + partitionNumber++; + } + + + + } + + + \ No newline at end of file diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..68929c5 --- /dev/null +++ b/pom.xml @@ -0,0 +1,35 @@ + + 4.0.0 + + org.barcelonamedia.myrrixexample + myrrix-example-parent + pom + 0.1 + Multi module myrrix example parent + + + rescorer + myrrix-web + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + 1.6 + 1.6 + + + + + + + UTF-8 + + diff --git a/rescorer/pom.xml b/rescorer/pom.xml new file mode 100644 index 0000000..b3457bb --- /dev/null +++ b/rescorer/pom.xml @@ -0,0 +1,39 @@ + + + myrrix-example-parent + org.barcelonamedia.myrrixexample + 0.1 + + 4.0.0 + rescorer + Simple Myrrix Rescorer + 0.1 + + + net.myrrix + myrrix-online + 0.11 + provided + + + commons-logging + commons-logging + 1.1.1 + jar + compile + + + + + sonatype-nexus-releases + Sonatype Nexus Releases + https://oss.sonatype.org/content/repositories/releases + + + sonatype-nexus-snapshot + Sonatype Nexus Snapshots + https://oss.sonatype.org/content/repositories/snapshots + + + \ No newline at end of file diff --git a/rescorer/src/main/java/org/barcelonamedia/myrrixexample/rescorer/LongPairMultiplyRescorer.java b/rescorer/src/main/java/org/barcelonamedia/myrrixexample/rescorer/LongPairMultiplyRescorer.java new file mode 100644 index 0000000..5c9cfe1 --- /dev/null +++ b/rescorer/src/main/java/org/barcelonamedia/myrrixexample/rescorer/LongPairMultiplyRescorer.java @@ -0,0 +1,28 @@ +package org.barcelonamedia.myrrixexample.rescorer; + +import net.myrrix.common.collection.FastByIDMap; + +import org.apache.mahout.cf.taste.recommender.Rescorer; +import org.apache.mahout.common.LongPair; + +public class LongPairMultiplyRescorer implements Rescorer { + + FastByIDMap scores; + + public LongPairMultiplyRescorer(FastByIDMap scores) { + super(); + this.scores = scores; + } + + public boolean isFiltered(LongPair itemIds) { + return !scores.containsKey(itemIds.getFirst()); + } + + /** + * rescores items by score map, fails if no score in map (check with isFiltered() first) + */ + public double rescore(LongPair itemIds, double orgScore) { + return orgScore * scores.get(itemIds.getFirst()); + } + +} diff --git a/rescorer/src/main/java/org/barcelonamedia/myrrixexample/rescorer/MultiplyRescorer.java b/rescorer/src/main/java/org/barcelonamedia/myrrixexample/rescorer/MultiplyRescorer.java new file mode 100644 index 0000000..6dcdc20 --- /dev/null +++ b/rescorer/src/main/java/org/barcelonamedia/myrrixexample/rescorer/MultiplyRescorer.java @@ -0,0 +1,27 @@ +package org.barcelonamedia.myrrixexample.rescorer; + +import net.myrrix.common.collection.FastByIDMap; + +import org.apache.mahout.cf.taste.recommender.IDRescorer; + +public class MultiplyRescorer implements IDRescorer { + + FastByIDMap scores; + + public boolean isFiltered(long itemId) { + return !scores.containsKey(itemId); + } + + /** + * rescores items by score map, fails if no score in map (check with isFiltered() first) + */ + public double rescore(long itemId, double orgScore) { + return orgScore * scores.get(itemId); + } + + public MultiplyRescorer(FastByIDMap scores) { + super(); + this.scores = scores; + } + +} diff --git a/rescorer/src/main/java/org/barcelonamedia/myrrixexample/rescorer/MultiplyRescorerProvider.java b/rescorer/src/main/java/org/barcelonamedia/myrrixexample/rescorer/MultiplyRescorerProvider.java new file mode 100644 index 0000000..09cc416 --- /dev/null +++ b/rescorer/src/main/java/org/barcelonamedia/myrrixexample/rescorer/MultiplyRescorerProvider.java @@ -0,0 +1,121 @@ +/** + * + */ +package org.barcelonamedia.myrrixexample.rescorer; + +import net.myrrix.common.MyrrixRecommender; +import net.myrrix.common.collection.FastByIDMap; +import net.myrrix.online.RescorerProvider; +import net.myrrix.online.ServerRecommender; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.mahout.cf.taste.recommender.IDRescorer; +import org.apache.mahout.cf.taste.recommender.Rescorer; +import org.apache.mahout.common.LongPair; + +/** + * Simple rescorer that receives (id,score) tuples and multiplies myrrix scores with those + * Originally written for Myrrix <= 0.10 and adapted to work with 0.11 + * @author jens.grivolla + * + */ +public class MultiplyRescorerProvider implements RescorerProvider { + + /** + * Logger for this class and subclasses. + */ + protected final Log log = LogFactory.getLog(getClass()); + + /** + * @param args comma separated list of id-score pairs (each in id:score format) using string ids + * @return map of long id to score, will fail terribly if the format is invalid + */ + private FastByIDMap argsToMap(String[] args) { + FastByIDMap scoreMap = new FastByIDMap(); + for (String arg: args) { + for (String score: arg.split(",")) { + String[] idVal = score.split(":"); + if (idVal.length == 2) { + scoreMap.put(Long.parseLong(idVal[0]), Double.parseDouble(idVal[1])); + } + } + } + return scoreMap; + } + + /** + * @param userIDs user(s) for which recommendations are being made, which may be needed in the rescoring logic. + * @param args arguments, if any, that should be used when making the {@link IDRescorer}. This is additional + * information from the request that may be necessary to its logic, like current location. What it means + * is up to the implementation. + * @return {@link IDRescorer} to use with {@link ServerRecommender#recommend(long, int, IDRescorer)} + * or {@code null} if none should be used. The resulting {@link IDRescorer} will be passed each candidate + * item ID to {@link IDRescorer#isFiltered(long)}, and each non-filtered candidate with its original score + * to {@link IDRescorer#rescore(long, double)} + */ + public IDRescorer getRecommendRescorer(long[] userIDs, String... args) { + if (args.length == 0) { + return null; + } + return new MultiplyRescorer(argsToMap(args)); + } + + /** + * @param itemIDs items that the anonymous user is associated to + * @param args arguments, if any, that should be used when making the {@link IDRescorer}. This is additional + * information from the request that may be necessary to its logic, like current location. What it means + * is up to the implementation. + * @return {@link IDRescorer} to use with {@link ServerRecommender#recommendToAnonymous(long[], int, IDRescorer)} + * or {@code null} if none should be used. The resulting {@link IDRescorer} will be passed each candidate + * item ID to {@link IDRescorer#isFiltered(long)}, and each non-filtered candidate with its original score + * to {@link IDRescorer#rescore(long, double)} + */ + public IDRescorer getRecommendToAnonymousRescorer(long[] itemIDs, String... args){ + return getRecommendRescorer(itemIDs, args); + } + + + /** + * @param args arguments, if any, that should be used when making the {@link IDRescorer}. This is additional + * information from the request that may be necessary to its logic, like current location. What it means + * is up to the implementation. + * @return {@link Rescorer} to use with {@link ServerRecommender#mostSimilarItems(long[], int, Rescorer)} + * or {@code null} if none should be used. The resulting {@code Rescorer<LongPair>} will be passed + * each candidate item ID pair (IDs of the two similar items) to {@link Rescorer#isFiltered(Object)}, + * and each non-filtered candidate item ID pair with its original score to + * {@link Rescorer#rescore(Object, double)} + */ + public Rescorer getMostSimilarItemsRescorer(String... args) { + if (args.length == 0) { + return null; + } + return new LongPairMultiplyRescorer(argsToMap(args)); + } + + @Override + public IDRescorer getMostPopularItemsRescorer(MyrrixRecommender arg0, + String... arg1) { + // TODO Auto-generated method stub + return null; + } + + @Override + public Rescorer getMostSimilarItemsRescorer(MyrrixRecommender arg0, + String... arg1) { + return getMostSimilarItemsRescorer(arg1); + } + + @Override + public IDRescorer getRecommendRescorer(long[] arg0, MyrrixRecommender arg1, + String... arg2) { + return getRecommendRescorer(arg0, arg2); + } + + @Override + public IDRescorer getRecommendToAnonymousRescorer(long[] arg0, + MyrrixRecommender arg1, String... arg2) { + return getRecommendToAnonymousRescorer(arg0, arg2); + } + +}