Skip to content

Commit 990d45d

Browse files
insipxordiandvdplm
authored
Add Rocksdb Secondary Instance Api (#384)
* kvdb-rocksdb: update to new set_upper_bound API * kvdb-rocksdb: update rocksdb to crates.io version * kvdb-rocksdb: update the changelog * Fix build? Set VM template. * Fix build? correct image name * Fix build? Maybe it's 2019? * appveyor: try release build * Revert "appveyor: try release build" This reverts commit ace87ee. * checkout rust-rocksdb/rust-rocksdb#412 * revert patch * revert unrelated changes * add open as secondary rocksdb api * Update kvdb-rocksdb/src/lib.rs Co-Authored-By: Andronik Ordian <[email protected]> * add more information to secondary mode comment * add function to catch up a secondary instance with a primary instance * one more doc comment for more clarity * style fixes * Update kvdb-rocksdb/src/lib.rs Co-Authored-By: David <[email protected]> * Update kvdb-rocksdb/src/lib.rs Co-Authored-By: David <[email protected]> * change name of `secondary_mode` option to `secondary` * Update kvdb-rocksdb/src/lib.rs Co-Authored-By: David <[email protected]> * fix some punctuation * specify a different directory for secondary instance to store its logs * Update kvdb-rocksdb/src/lib.rs Co-authored-by: Andronik Ordian <[email protected]> * remove catching up on primary db in test * doc comment fixes expand on what `try_catch_up_with_secondary` does, since it may have some implications on the primary instance of rocksdb according to L503-566 in `db/db_impl/db_impl_secondary.cc` of facebook/rocksdb * remove wrong info about blocking primary instance * more docs for catch-up-with-primary * grammar * make `max_open_files` comment clearer Co-authored-by: Andronik Ordian <[email protected]> Co-authored-by: David Palm <[email protected]>
1 parent 50c3dc2 commit 990d45d

File tree

1 file changed

+138
-15
lines changed

1 file changed

+138
-15
lines changed

kvdb-rocksdb/src/lib.rs

Lines changed: 138 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,17 @@ pub struct DatabaseConfig {
166166
/// It can have a negative performance impact up to 10% according to
167167
/// https://github.com/facebook/rocksdb/wiki/Statistics.
168168
pub enable_statistics: bool,
169+
/// Open the database as a secondary instance.
170+
/// Specify a path for the secondary instance of the database.
171+
/// Secondary instances are read-only and kept updated by tailing the rocksdb MANIFEST.
172+
/// It is up to the user to call `catch_up_with_primary()` manually to update the secondary db.
173+
/// Disabled by default.
174+
///
175+
/// `max_open_files` is overridden to always equal `-1`.
176+
/// May have a negative performance impact on the secondary instance
177+
/// if the secondary instance reads and applies state changes before the primary instance compacts them.
178+
/// More info: https://github.com/facebook/rocksdb/wiki/Secondary-instance
179+
pub secondary: Option<String>,
169180
}
170181

171182
impl DatabaseConfig {
@@ -215,6 +226,7 @@ impl Default for DatabaseConfig {
215226
columns: 1,
216227
keep_log_file_num: 1,
217228
enable_statistics: false,
229+
secondary: None,
218230
}
219231
}
220232
}
@@ -305,7 +317,11 @@ fn generate_options(config: &DatabaseConfig) -> Options {
305317
}
306318
opts.set_use_fsync(false);
307319
opts.create_if_missing(true);
308-
opts.set_max_open_files(config.max_open_files);
320+
if config.secondary.is_some() {
321+
opts.set_max_open_files(-1)
322+
} else {
323+
opts.set_max_open_files(config.max_open_files);
324+
}
309325
opts.set_bytes_per_sync(1 * MB as u64);
310326
opts.set_keep_log_file_num(1);
311327
opts.increase_parallelism(cmp::max(1, num_cpus::get() as i32 / 2));
@@ -364,12 +380,38 @@ impl Database {
364380
}
365381

366382
let column_names: Vec<_> = (0..config.columns).map(|c| format!("col{}", c)).collect();
367-
368383
let write_opts = WriteOptions::default();
369384
let read_opts = generate_read_options();
370385

386+
let db = if let Some(secondary_path) = &config.secondary {
387+
Self::open_secondary(&opts, path, secondary_path.as_str(), column_names.as_slice())?
388+
} else {
389+
let column_names: Vec<&str> = column_names.iter().map(|s| s.as_str()).collect();
390+
Self::open_primary(&opts, path, config, column_names.as_slice(), &block_opts)?
391+
};
392+
393+
Ok(Database {
394+
db: RwLock::new(Some(DBAndColumns { db, column_names })),
395+
config: config.clone(),
396+
path: path.to_owned(),
397+
opts,
398+
read_opts,
399+
write_opts,
400+
block_opts,
401+
stats: stats::RunningDbStats::new(),
402+
})
403+
}
404+
405+
/// Internal api to open a database in primary mode.
406+
fn open_primary(
407+
opts: &Options,
408+
path: &str,
409+
config: &DatabaseConfig,
410+
column_names: &[&str],
411+
block_opts: &BlockBasedOptions,
412+
) -> io::Result<rocksdb::DB> {
371413
let cf_descriptors: Vec<_> = (0..config.columns)
372-
.map(|i| ColumnFamilyDescriptor::new(&column_names[i as usize], config.column_config(&block_opts, i)))
414+
.map(|i| ColumnFamilyDescriptor::new(column_names[i as usize], config.column_config(&block_opts, i)))
373415
.collect();
374416

375417
let db = match DB::open_cf_descriptors(&opts, path, cf_descriptors) {
@@ -390,31 +432,42 @@ impl Database {
390432
ok => ok,
391433
};
392434

393-
let db = match db {
435+
Ok(match db {
394436
Ok(db) => db,
395437
Err(ref s) if is_corrupted(s) => {
396438
warn!("DB corrupted: {}, attempting repair", s);
397439
DB::repair(&opts, path).map_err(other_io_err)?;
398440

399441
let cf_descriptors: Vec<_> = (0..config.columns)
400442
.map(|i| {
401-
ColumnFamilyDescriptor::new(&column_names[i as usize], config.column_config(&block_opts, i))
443+
ColumnFamilyDescriptor::new(column_names[i as usize], config.column_config(&block_opts, i))
402444
})
403445
.collect();
404446

405447
DB::open_cf_descriptors(&opts, path, cf_descriptors).map_err(other_io_err)?
406448
}
407449
Err(s) => return Err(other_io_err(s)),
408-
};
409-
Ok(Database {
410-
db: RwLock::new(Some(DBAndColumns { db, column_names })),
411-
config: config.clone(),
412-
path: path.to_owned(),
413-
opts,
414-
read_opts,
415-
write_opts,
416-
block_opts,
417-
stats: stats::RunningDbStats::new(),
450+
})
451+
}
452+
453+
/// Internal api to open a database in secondary mode.
454+
/// Secondary database needs a seperate path to store its own logs.
455+
fn open_secondary(
456+
opts: &Options,
457+
path: &str,
458+
secondary_path: &str,
459+
column_names: &[String],
460+
) -> io::Result<rocksdb::DB> {
461+
let db = DB::open_cf_as_secondary(&opts, path, secondary_path, column_names);
462+
463+
Ok(match db {
464+
Ok(db) => db,
465+
Err(ref s) if is_corrupted(s) => {
466+
warn!("DB corrupted: {}, attempting repair", s);
467+
DB::repair(&opts, path).map_err(other_io_err)?;
468+
DB::open_cf_as_secondary(&opts, path, secondary_path, column_names).map_err(other_io_err)?
469+
}
470+
Err(s) => return Err(other_io_err(s)),
418471
})
419472
}
420473

@@ -635,6 +688,33 @@ impl Database {
635688
HashMap::new()
636689
}
637690
}
691+
692+
/// Try to catch up a secondary instance with
693+
/// the primary by reading as much from the logs as possible.
694+
///
695+
/// Guaranteed to have changes up to the the time that `try_catch_up_with_primary` is called
696+
/// if it finishes succesfully.
697+
///
698+
/// Blocks until the MANIFEST file and any state changes in the corresponding Write-Ahead-Logs
699+
/// are applied to the secondary instance. If the manifest files are very large
700+
/// this method could take a long time.
701+
///
702+
/// If Write-Ahead-Logs have been purged by the primary instance before the secondary
703+
/// is able to open them, the secondary will not be caught up
704+
/// until this function is called again and new Write-Ahead-Logs are identified.
705+
///
706+
/// If called while the primary is writing, the catch-up may fail.
707+
///
708+
/// If the secondary is unable to catch up because of missing logs,
709+
/// this method fails silently and no error is returned.
710+
///
711+
/// Calling this as primary will return an error.
712+
pub fn try_catch_up_with_primary(&self) -> io::Result<()> {
713+
match self.db.read().as_ref() {
714+
Some(DBAndColumns { db, .. }) => db.try_catch_up_with_primary().map_err(other_io_err),
715+
None => Ok(()),
716+
}
717+
}
638718
}
639719

640720
// duplicate declaration of methods here to avoid trait import in certain existing cases
@@ -755,6 +835,48 @@ mod tests {
755835
st::test_io_stats(&db)
756836
}
757837

838+
#[test]
839+
fn secondary_db_get() -> io::Result<()> {
840+
let primary = TempDir::new("")?;
841+
let config = DatabaseConfig::with_columns(1);
842+
let db = Database::open(&config, primary.path().to_str().expect("tempdir path is valid unicode"))?;
843+
844+
let key1 = b"key1";
845+
let mut transaction = db.transaction();
846+
transaction.put(0, key1, b"horse");
847+
db.write(transaction)?;
848+
849+
let config = DatabaseConfig {
850+
secondary: TempDir::new("")?.path().to_str().map(|s| s.to_string()),
851+
..DatabaseConfig::with_columns(1)
852+
};
853+
let second_db = Database::open(&config, primary.path().to_str().expect("tempdir path is valid unicode"))?;
854+
assert_eq!(&*second_db.get(0, key1)?.unwrap(), b"horse");
855+
Ok(())
856+
}
857+
858+
#[test]
859+
fn secondary_db_catch_up() -> io::Result<()> {
860+
let primary = TempDir::new("")?;
861+
let config = DatabaseConfig::with_columns(1);
862+
let db = Database::open(&config, primary.path().to_str().expect("tempdir path is valid unicode"))?;
863+
864+
let config = DatabaseConfig {
865+
secondary: TempDir::new("")?.path().to_str().map(|s| s.to_string()),
866+
..DatabaseConfig::with_columns(1)
867+
};
868+
let second_db = Database::open(&config, primary.path().to_str().expect("tempdir path is valid unicode"))?;
869+
870+
let mut transaction = db.transaction();
871+
transaction.put(0, b"key1", b"mule");
872+
transaction.put(0, b"key2", b"cat");
873+
db.write(transaction)?;
874+
875+
second_db.try_catch_up_with_primary()?;
876+
assert_eq!(&*second_db.get(0, b"key2")?.unwrap(), b"cat");
877+
Ok(())
878+
}
879+
758880
#[test]
759881
fn mem_tables_size() {
760882
let tempdir = TempDir::new("").unwrap();
@@ -766,6 +888,7 @@ mod tests {
766888
columns: 11,
767889
keep_log_file_num: 1,
768890
enable_statistics: false,
891+
secondary: None,
769892
};
770893

771894
let db = Database::open(&config, tempdir.path().to_str().unwrap()).unwrap();

0 commit comments

Comments
 (0)