Skip to content

Commit 4d67c4a

Browse files
author
Brett Hazen
committed
Switch default search to YZ and add Schema creation foo
1 parent 3b2f2ea commit 4d67c4a

File tree

2 files changed

+89
-15
lines changed

2 files changed

+89
-15
lines changed

python_test.py

+58-15
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,28 @@
99
from os import environ
1010
from sys import maxint
1111

12-
def ListTweets(bucket):
12+
def ListTweets(bucket, streaming=False):
1313
"""Dump all keys in the bucket to stdout"""
1414
decoder = bucket.get_decoder("application/json")
15-
allkeys = bucket.get_keys()
15+
if streaming:
16+
allkeys = bucket.stream_keys()
17+
allkeys = [item for sublist in allkeys for item in sublist]
18+
else:
19+
allkeys = bucket.get_keys()
1620
bucket.allow_mult = True
1721
count = 0
1822
if allkeys == None:
1923
print "No Tweets."
2024
return
25+
print "Total keys = {0}".format(len(allkeys))
2126
for key in allkeys:
2227
count = count + 1
2328
obj = bucket.get(key)
2429
for sibling in obj.siblings:
2530
json = sibling.encoded_data
26-
dict = decoder(json)
27-
print "[%d] - %s - %s at %s" % (count,key,dict['user'],dict['time'])
28-
print dict['tweet'].encode("utf-8")
31+
jsdict = decoder(json)
32+
print "[%d] - %s - %s at %s" % (count,key,jsdict['user'],jsdict['time'])
33+
print jsdict['tweet'].encode("utf-8")
2934
links = sibling.links
3035
for parent_link in links:
3136
print "LINK"
@@ -34,9 +39,9 @@ def ListTweets(bucket):
3439
for psibling in pobj.siblings:
3540
pjson = psibling.encoded_data
3641
if pjson != None:
37-
dict = decoder(pjson)
38-
print "\tPARENT - %s at %s" % (dict['user'],dict['time'])
39-
print "\t%s" % dict['tweet'].encode("utf-8")
42+
jsdict = decoder(pjson)
43+
print "\tPARENT - %s at %s" % (jsdict['user'],jsdict['time'])
44+
print "\t%s" % jsdict['tweet'].encode("utf-8")
4045

4146

4247
def DeleteTweets(bucket):
@@ -83,7 +88,8 @@ def LoadTweets(protocol, bucket, quantity, term):
8388
tweet = bucket.new(str(status.id), data={
8489
'tweet': status.text.encode("utf-8"),
8590
'user': status.user.screen_name,
86-
'time': dt.isoformat()
91+
'time': dt.isoformat() + 'Z'
92+
# Add 'Z' for Solr Compatibility
8793
})
8894
tweet.add_index('user_bin',status.user.screen_name)
8995
if parent != None:
@@ -93,7 +99,7 @@ def LoadTweets(protocol, bucket, quantity, term):
9399
tweet.add_link(parent_node)
94100
tweet.store()
95101

96-
def SearchTweets(client, bucket, term):
102+
def SearchOldTweets(client, bucket, term):
97103
# First parameter is the bucket we want to search within, the second
98104
# is the query we want to perform.
99105
print 'tweet:{0}'.format(term)
@@ -107,6 +113,26 @@ def SearchTweets(client, bucket, term):
107113
print "%d = %s - %s at %s" % (count, item['id'], item['user'], item['time'])
108114
print item['tweet']
109115

116+
117+
def SearchTweets(client, bucket, term):
118+
# First parameter is the bucket we want to search within, the second
119+
# is the query we want to perform against Riak 2.0 aka Yokozuna
120+
121+
# Format <key>:*<value>*
122+
results = bucket.search(term)
123+
count = 0
124+
decoder = bucket.get_decoder("application/json")
125+
for item in results['docs']:
126+
count = count + 1
127+
key = item['_yz_rk']
128+
obj = bucket.get(key)
129+
for sibling in obj.siblings:
130+
json = sibling.encoded_data
131+
jsdict = decoder(json)
132+
print "[%d] - %s - %s at %s" % (count,key,jsdict['user'],jsdict['time'])
133+
print jsdict['tweet'].encode("utf-8")
134+
135+
110136
def Search2iTweets(bucket, term):
111137
decoder = bucket.get_decoder("application/json")
112138
result = bucket.get_index('user_bin', term)
@@ -116,9 +142,9 @@ def Search2iTweets(bucket, term):
116142
count = count + 1
117143
obj = bucket.get(key)
118144
json = obj.get_encoded_data()
119-
dict = decoder(json)
120-
print "[%d] - %s at %s" % (count,dict['user'],dict['time'])
121-
print dict['tweet']
145+
jsdict = decoder(json)
146+
print "[%d] - %s at %s" % (count,jsdict['user'],jsdict['time'])
147+
print jsdict['tweet']
122148

123149
def MapReduceTweets(client, bucket, term):
124150
query = client.add(bucket.name)
@@ -129,6 +155,19 @@ def MapReduceTweets(client, bucket, term):
129155
# Print the key (``v.key``) and the value for that key (``data``).
130156
print "%s - %s" % (result[0], result[1])
131157

158+
def CreateSearchSchema(client, name):
159+
xml_file = open(name + '.xml', 'r')
160+
schema_data = xml_file.read()
161+
client.create_search_schema(name, schema_data)
162+
xml_file.close()
163+
client.create_search_index(name, name)
164+
time.sleep(5)
165+
166+
# Associate bucket with search index
167+
bucket = client.bucket('twitter')
168+
bucket.set_property('search_index', 'twitter')
169+
170+
132171
# MAIN
133172
parser = argparse.ArgumentParser(description='Brew us some fresh, hot Riak!')
134173
parser.add_argument('-p','--dump', help='Dump all tweets', action='store_true')
@@ -138,10 +177,11 @@ def MapReduceTweets(client, bucket, term):
138177
parser.add_argument('--host', help='Hostname', default='localhost')
139178
parser.add_argument('-t','--http', type=int, help='HTTP port number', default=10018)
140179
parser.add_argument('-b','--pbc', type=int, help='Protocol Buffer port number', default=10017)
141-
parser.add_argument('--protocol', help='Name of transport protocol to use', default='http', choices=['http','https','pbc'])
180+
parser.add_argument('--protocol', help='Name of transport protocol to use', default='pbc', choices=['http','pbc'])
142181
parser.add_argument('-x','--delete', help='Delete all tweets', action='store_true')
143182
parser.add_argument('-2','--twoi', help='Query 2i')
144183
parser.add_argument('-mr','--mapreduce', help='Test MapReduce to look for a user''s tweets')
184+
parser.add_argument('-sch','--schema', help='Create a YZ search schema from XML file')
145185
args = parser.parse_args()
146186
print args
147187

@@ -161,7 +201,7 @@ def MapReduceTweets(client, bucket, term):
161201
DeleteTweets(bucket)
162202
elif args.dump:
163203
print "Dumping all existing tweets in Riak"
164-
ListTweets(bucket)
204+
ListTweets(bucket, False)
165205
elif args.twoi != None:
166206
Search2iTweets(bucket, args.twoi)
167207
elif args.mapreduce != None:
@@ -172,3 +212,6 @@ def MapReduceTweets(client, bucket, term):
172212
elif args.search != None:
173213
print "Searching for term '%s' in loaded tweets" % args.search
174214
SearchTweets(client, bucket, args.search)
215+
elif args.schema != None:
216+
print "Creating schema '%s'" % args.schema
217+
CreateSearchSchema(client, args.schema)

twitter.xml

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
<?xml version="1.0" encoding="UTF-8" ?>
2+
<schema name="twitter" version="1.5">
3+
<fields>
4+
<field name="tweet" type="string" indexed="true" stored="false"/>
5+
<field name="user" type="string" indexed="true" stored="false"/>
6+
<field name="time" type="date" indexed="true" stored="false"/>
7+
8+
<!-- All of these fields are required by Riak Search -->
9+
<field name="_yz_id" type="_yz_str" indexed="true" stored="true" required="true"/>
10+
<field name="_yz_ed" type="_yz_str" indexed="true" stored="false"/>
11+
<field name="_yz_pn" type="_yz_str" indexed="true" stored="false"/>
12+
<field name="_yz_fpn" type="_yz_str" indexed="true" stored="false"/>
13+
<field name="_yz_vtag" type="_yz_str" indexed="true" stored="false"/>
14+
<field name="_yz_rk" type="_yz_str" indexed="true" stored="true"/>
15+
<field name="_yz_rt" type="_yz_str" indexed="true" stored="true"/>
16+
<field name="_yz_rb" type="_yz_str" indexed="true" stored="true"/>
17+
<field name="_yz_err" type="_yz_str" indexed="true" stored="false"/>
18+
</fields>
19+
20+
<uniqueKey>_yz_id</uniqueKey>
21+
22+
<types>
23+
<!-- YZ String: Used for non-analyzed fields -->
24+
<fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" />
25+
26+
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
27+
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
28+
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
29+
<fieldType name="date" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>
30+
</types>
31+
</schema>

0 commit comments

Comments
 (0)