Skip to content

Commit eef7899

Browse files
committed
Merge from cloudant/clouseau
1 parent 4dbd4ba commit eef7899

File tree

7 files changed

+241
-47
lines changed

7 files changed

+241
-47
lines changed

README.md

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,19 @@
11
# clouseau
22

3-
Clouseau uses Scalang to expose Lucene functionality via erlang-like nodes.
3+
Expose Lucene features to erlang RPC.
4+
5+
## Configuration options
6+
This guide explains the various clouseau configuration options available, and how to use them to tune clouseau performance and scalability. There are two categories of clouseau options, first category is about tuning the JVM (ex: Xmx) and other category of options that go into clouseau.ini.
7+
8+
Clouseau configuration options (as determined by the relevant role in chef-repo) are stored in `/opt/clouseau/etc/clouseau.ini` and some options (about JVM tuning) go into the command used to start and stop clouseau.
9+
10+
Example clouseau configuration options in clouseau.ini:
11+
```
12+
[clouseau]
13+
max_indexes_open=15000
14+
close_if_idle=true
15+
idle_check_interval_secs=600
16+
```
417

518
## Running a local dev cluster
619

pom.xml

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ the License.
108108
<dependency>
109109
<groupId>com.boundary</groupId>
110110
<artifactId>scalang-scala_2.9.1</artifactId>
111-
<version>0.28-cloudant2</version>
111+
<version>0.28-cloudant3</version>
112112
</dependency>
113113
<dependency>
114114
<groupId>org.slf4j</groupId>
@@ -156,17 +156,13 @@ the License.
156156
<version>${scala.plugin.version}</version>
157157
<configuration>
158158
<launchers>
159-
<launcher>
160-
<id>clouseau</id>
161-
<mainClass>com.cloudant.clouseau.Main</mainClass>
162-
</launcher>
163159
<launcher>
164160
<id>clouseau1</id>
165161
<mainClass>com.cloudant.clouseau.Main</mainClass>
166162
<jvmArgs>
167163
<jvmArg>[email protected]</jvmArg>
168164
<jvmArg>-Dclouseau.cookie=monster</jvmArg>
169-
<jvmArg>-Dclouseau.dir=target/clouseau1</jvmArg>
165+
<jvmArg>-Dclouseau.dir=${basedir}/target/clouseau1</jvmArg>
170166
</jvmArgs>
171167
</launcher>
172168
<launcher>
@@ -175,7 +171,7 @@ the License.
175171
<jvmArgs>
176172
<jvmArg>[email protected]</jvmArg>
177173
<jvmArg>-Dclouseau.cookie=monster</jvmArg>
178-
<jvmArg>-Dclouseau.dir=target/clouseau2</jvmArg>
174+
<jvmArg>-Dclouseau.dir=${basedir}/target/clouseau2</jvmArg>
179175
</jvmArgs>
180176
</launcher>
181177
<launcher>
@@ -184,7 +180,7 @@ the License.
184180
<jvmArgs>
185181
<jvmArg>[email protected]</jvmArg>
186182
<jvmArg>-Dclouseau.cookie=monster</jvmArg>
187-
<jvmArg>-Dclouseau.dir=target/clouseau3</jvmArg>
183+
<jvmArg>-Dclouseau.dir=${basedir}/target/clouseau3</jvmArg>
188184
</jvmArgs>
189185
</launcher>
190186
</launchers>
@@ -363,4 +359,25 @@ the License.
363359
</plugins>
364360
</reporting>
365361

362+
<distributionManagement>
363+
<repository>
364+
<id>maven.cloudant.com</id>
365+
<name>maven.cloudant.com-releases</name>
366+
<url>scpexe://maven.cloudant.com/var/www/domains/cloudant.com/maven/htdocs/repo/</url>
367+
</repository>
368+
<snapshotRepository>
369+
<id>maven.cloudant.com</id>
370+
<name>maven.cloudant.com-snapshots</name>
371+
<url>scpexe://maven.cloudant.com/var/www/domains/cloudant.com/maven/htdocs/repo/</url>
372+
</snapshotRepository>
373+
<site>
374+
<id>website</id>
375+
<url>scpexe://maven.cloudant.com/var/www/domains/cloudant.com/maven/htdocs/site/</url>
376+
</site>
377+
</distributionManagement>
378+
379+
<scm>
380+
<connection>scm:git:https://github.com/cloudant-labs/clouseau.git</connection>
381+
</scm>
382+
366383
</project>

src/main/scala/com/cloudant/clouseau/ClouseauTypeFactory.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ case class OpenIndexMsg(peer: Pid, path: String, options: Any)
3232
case class CleanupPathMsg(path: String)
3333
case class RenamePathMsg(dbName: String)
3434
case class CleanupDbMsg(dbName: String, activeSigs: List[String])
35+
case class DiskSizeMsg(path: String)
3536

3637
case class Group1Msg(query: String, field: String, refresh: Boolean, groupSort: Any, groupOffset: Int,
3738
groupLimit: Int)
@@ -88,6 +89,8 @@ object ClouseauTypeFactory extends TypeFactory {
8889
Some(UpdateDocMsg(id, doc))
8990
case ('delete, 2) =>
9091
Some(DeleteDocMsg(reader.readAs[String]))
92+
case ('disk_size, 2) =>
93+
Some(DiskSizeMsg(reader.readAs[String]))
9194
case ('commit, 2) =>
9295
Some(CommitMsg(toLong(reader.readTerm)))
9396
case ('set_update_seq, 2) =>

src/main/scala/com/cloudant/clouseau/IndexManagerService.scala

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ class IndexManagerService(ctx: ServiceContext[ConfigurationArgs]) extends Servic
117117
pid ! 'delete
118118
'ok
119119
}
120+
case DiskSizeMsg(path: String) =>
121+
getDiskSize(path)
120122
case 'close_lru =>
121123
lru.close()
122124
'ok
@@ -146,6 +148,18 @@ class IndexManagerService(ctx: ServiceContext[ConfigurationArgs]) extends Servic
146148
'ignored
147149
}
148150

151+
private def getDiskSize(path: String) = {
152+
val indexDir = new File(rootDir, path)
153+
val files = indexDir.list()
154+
if (files != null) {
155+
val size = files.foldLeft(0L)((acc, fileName) =>
156+
acc + (new File(indexDir, fileName)).length())
157+
('ok, List(('disk_size, size)))
158+
} else {
159+
('error, 'not_a_directory)
160+
}
161+
}
162+
149163
private def replyAll(path: String, msg: Any) {
150164
waiters.remove(path) match {
151165
case Some(list) =>

src/main/scala/com/cloudant/clouseau/IndexService.scala

Lines changed: 74 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ import scalang.Pid
5656
import scalang.Reference
5757
import com.spatial4j.core.context.SpatialContext
5858
import com.spatial4j.core.distance.DistanceUtils
59+
import java.util.HashSet
5960

6061
case class IndexServiceArgs(config: Configuration, name: String, queryParser: QueryParser, writer: IndexWriter)
6162
case class HighlightParameters(highlighter: Highlighter, highlightFields: List[String], highlightNumber: Int, analyzers: List[Analyzer])
@@ -71,6 +72,7 @@ class IndexService(ctx: ServiceContext[IndexServiceArgs]) extends Service(ctx) w
7172
var pendingSeq = updateSeq
7273
var committing = false
7374
var forceRefresh = false
75+
var idle = true
7476

7577
val searchTimer = metrics.timer("searches")
7678
val updateTimer = metrics.timer("updates")
@@ -80,45 +82,57 @@ class IndexService(ctx: ServiceContext[IndexServiceArgs]) extends Service(ctx) w
8082
// Start committer heartbeat
8183
val commitInterval = ctx.args.config.getInt("commit_interval_secs", 30)
8284
sendEvery(self, 'maybe_commit, commitInterval * 1000)
85+
val countFieldsEnabled = ctx.args.config.getBoolean("clouseau.count_fields", false)
86+
send(self, 'count_fields)
87+
88+
// Check if the index is idle and optionally close it if there is no activity between
89+
//Two consecutive idle status checks.
90+
val closeIfIdleEnabled = ctx.args.config.getBoolean("clouseau.close_if_idle", false)
91+
val idleTimeout = ctx.args.config.getInt("clouseau.idle_check_interval_secs", 300)
92+
if (closeIfIdleEnabled) {
93+
sendEvery(self, 'close_if_idle, idleTimeout * 1000)
94+
}
8395

8496
debug("Opened at update_seq %d".format(updateSeq))
8597

8698
override def handleCall(tag: (Pid, Reference), msg: Any): Any = {
99+
idle = false
87100
send('main, ('touch_lru, ctx.args.name))
88-
89-
msg match {
90-
case request: SearchRequest =>
91-
search(request)
92-
case Group1Msg(query: String, field: String, refresh: Boolean, groupSort: Any, groupOffset: Int,
93-
groupLimit: Int) =>
94-
group1(query, field, refresh, groupSort, groupOffset, groupLimit)
95-
case request: Group2Msg =>
96-
group2(request)
97-
case 'get_update_seq =>
98-
('ok, updateSeq)
99-
case UpdateDocMsg(id: String, doc: Document) =>
100-
debug("Updating %s".format(id))
101-
updateTimer.time {
102-
ctx.args.writer.updateDocument(new Term("_id", id), doc)
103-
}
104-
'ok
105-
case DeleteDocMsg(id: String) =>
106-
debug("Deleting %s".format(id))
107-
deleteTimer.time {
108-
ctx.args.writer.deleteDocuments(new Term("_id", id))
109-
}
110-
'ok
111-
case CommitMsg(commitSeq: Long) => // deprecated
112-
pendingSeq = commitSeq
113-
debug("Pending sequence is now %d".format(commitSeq))
114-
'ok
115-
case SetUpdateSeqMsg(newSeq: Long) =>
116-
pendingSeq = newSeq
117-
debug("Pending sequence is now %d".format(newSeq))
118-
'ok
119-
case 'info =>
120-
('ok, getInfo)
121-
}
101+
internalHandleCall(tag, msg)
102+
}
103+
104+
def internalHandleCall(tag: (Pid, Reference), msg: Any): Any = msg match {
105+
case request: SearchRequest =>
106+
search(request)
107+
case Group1Msg(query: String, field: String, refresh: Boolean, groupSort: Any, groupOffset: Int,
108+
groupLimit: Int) =>
109+
group1(query, field, refresh, groupSort, groupOffset, groupLimit)
110+
case request: Group2Msg =>
111+
group2(request)
112+
case 'get_update_seq =>
113+
('ok, updateSeq)
114+
case UpdateDocMsg(id: String, doc: Document) =>
115+
debug("Updating %s".format(id))
116+
updateTimer.time {
117+
ctx.args.writer.updateDocument(new Term("_id", id), doc)
118+
}
119+
'ok
120+
case DeleteDocMsg(id: String) =>
121+
debug("Deleting %s".format(id))
122+
deleteTimer.time {
123+
ctx.args.writer.deleteDocuments(new Term("_id", id))
124+
}
125+
'ok
126+
case CommitMsg(commitSeq: Long) => // deprecated
127+
pendingSeq = commitSeq
128+
debug("Pending sequence is now %d".format(commitSeq))
129+
'ok
130+
case SetUpdateSeqMsg(newSeq: Long) =>
131+
pendingSeq = newSeq
132+
debug("Pending sequence is now %d".format(newSeq))
133+
'ok
134+
case 'info =>
135+
('ok, getInfo)
122136
}
123137

124138
override def handleCast(msg: Any) = msg match {
@@ -139,6 +153,13 @@ class IndexService(ctx: ServiceContext[IndexServiceArgs]) extends Service(ctx) w
139153
exit(msg)
140154
case ('close, reason) =>
141155
exit(reason)
156+
case ('close_if_idle) =>
157+
if (idle) {
158+
exit("Idle Timeout")
159+
}
160+
idle = true
161+
case 'count_fields =>
162+
countFields
142163
case 'delete =>
143164
val dir = ctx.args.writer.getDirectory
144165
ctx.args.writer.close()
@@ -157,6 +178,25 @@ class IndexService(ctx: ServiceContext[IndexServiceArgs]) extends Service(ctx) w
157178
committing = false
158179
}
159180

181+
def countFields() {
182+
if (countFieldsEnabled) {
183+
val leaves = reader.leaves().iterator()
184+
val warningThreshold = ctx.args.config.
185+
getInt("clouseau.field_count_warn_threshold", 5000)
186+
val fields = new HashSet[String]()
187+
while (leaves.hasNext() && fields.size <= warningThreshold) {
188+
val fieldInfoIter = leaves.next.reader().getFieldInfos().iterator()
189+
while (fieldInfoIter.hasNext() && fields.size <= warningThreshold) {
190+
fields.add(fieldInfoIter.next().name)
191+
}
192+
}
193+
if (fields.size > warningThreshold) {
194+
warn("Index has more than %d fields, ".format(warningThreshold) +
195+
"too many fields will lead to heap exhuastion")
196+
}
197+
}
198+
}
199+
160200
override def exit(msg: Any) {
161201
debug("Closed with reason: %.1000s".format(msg))
162202
try {

src/main/scala/com/cloudant/clouseau/Main.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ object Main extends App {
4242

4343
val name = config.getString("clouseau.name", "[email protected]")
4444
val cookie = config.getString("clouseau.cookie", "monster")
45+
val closeIfIdleEnabled = config.getBoolean("clouseau.close_if_idle", false)
46+
val idleTimeout = config.getInt("clouseau.idle_check_interval_secs", 300)
47+
if (closeIfIdleEnabled) {
48+
logger.info("Idle timout is enabled and will check the indexer idle status every %d seconds".format(idleTimeout))
49+
}
4550
val nodeconfig = NodeConfig(
4651
typeFactory = ClouseauTypeFactory,
4752
typeEncoder = ClouseauTypeEncoder,

0 commit comments

Comments
 (0)