Skip to content

Implement sorting in place for OrderMap, OrderSet #57

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jan 4, 2018
80 changes: 80 additions & 0 deletions benches/bench.rs
Original file line number Diff line number Diff line change
@@ -5,6 +5,7 @@ extern crate fnv;
#[macro_use]
extern crate lazy_static;

use std::hash::Hash;
use fnv::FnvHasher;
use std::hash::BuildHasherDefault;
type FnvBuilder = BuildHasherDefault<FnvHasher>;
@@ -361,13 +362,16 @@ fn lookup_orderedmap_10_000_noexist(b: &mut Bencher) {
// number of items to look up
const LOOKUP_MAP_SIZE: u32 = 100_000_u32;
const LOOKUP_SAMPLE_SIZE: u32 = 5000;
const SORT_MAP_SIZE: usize = 10_000;


// use lazy_static so that comparison benchmarks use the exact same inputs
lazy_static! {
static ref KEYS: Vec<u32> = {
shuffled_keys(0..LOOKUP_MAP_SIZE)
};
}

lazy_static! {
static ref HMAP_100K: HashMap<u32, u32> = {
let c = LOOKUP_MAP_SIZE;
@@ -392,6 +396,25 @@ lazy_static! {
};
}

lazy_static! {
static ref OMAP_SORT_U32: OrderMap<u32, u32> = {
let mut map = OrderMap::with_capacity(SORT_MAP_SIZE);
for &key in &KEYS[..SORT_MAP_SIZE] {
map.insert(key, key);
}
map
};
}
lazy_static! {
static ref OMAP_SORT_S: OrderMap<String, String> = {
let mut map = OrderMap::with_capacity(SORT_MAP_SIZE);
for &key in &KEYS[..SORT_MAP_SIZE] {
map.insert(format!("{:^16x}", &key), String::new());
}
map
};
}

#[bench]
fn lookup_hashmap_100_000_multi(b: &mut Bencher) {
let map = &*HMAP_100K;
@@ -643,3 +666,60 @@ fn many_retain_hashmap_100_000(b: &mut Bencher) {
map
});
}


// simple sort impl for comparison
pub fn simple_sort<K: Ord + Hash, V>(m: &mut OrderMap<K, V>) {
let mut ordered: Vec<_> = m.drain(..).collect();
ordered.sort_by(|left, right| left.0.cmp(&right.0));
m.extend(ordered);
}


#[bench]
fn ordermap_sort_s(b: &mut Bencher) {
let map = OMAP_SORT_S.clone();

// there's a map clone there, but it's still useful to profile this
b.iter(|| {
let mut map = map.clone();
map.sort_keys();
map
});
}

#[bench]
fn ordermap_simple_sort_s(b: &mut Bencher) {
let map = OMAP_SORT_S.clone();

// there's a map clone there, but it's still useful to profile this
b.iter(|| {
let mut map = map.clone();
simple_sort(&mut map);
map
});
}

#[bench]
fn ordermap_sort_u32(b: &mut Bencher) {
let map = OMAP_SORT_U32.clone();

// there's a map clone there, but it's still useful to profile this
b.iter(|| {
let mut map = map.clone();
map.sort_keys();
map
});
}

#[bench]
fn ordermap_simple_sort_u32(b: &mut Bencher) {
let map = OMAP_SORT_U32.clone();

// there's a map clone there, but it's still useful to profile this
b.iter(|| {
let mut map = map.clone();
simple_sort(&mut map);
map
});
}
7 changes: 1 addition & 6 deletions benches/faststring.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
#![feature(test)]
extern crate test;
extern crate rand;
extern crate fnv;
extern crate lazy_static;

use fnv::FnvHasher;
use std::hash::BuildHasherDefault;
type FnvBuilder = BuildHasherDefault<FnvHasher>;

use test::Bencher;

#[macro_use] extern crate ordermap;
extern crate ordermap;

use ordermap::OrderMap;

56 changes: 56 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -45,6 +45,11 @@ fn hash_elem_using<B: BuildHasher, K: ?Sized + Hash>(build: &B, k: &K) -> HashVa
#[derive(Copy, Debug)]
struct HashValue(usize);

impl HashValue {
#[inline(always)]
fn get(self) -> usize { self.0 }
}

impl Clone for HashValue {
#[inline]
fn clone(&self) -> Self { *self }
@@ -1044,6 +1049,57 @@ impl<K, V, S> OrderMap<K, V, S>
}
}

/// Sort the map’s key-value pairs by the default ordering of the keys.
///
/// See `sort_by` for details.
pub fn sort_keys(&mut self)
where K: Ord,
{
self.sort_by(|k1, _, k2, _| Ord::cmp(k1, k2))
}

/// Sort the map’s key-value pairs in place using the comparison
/// function `compare`.
///
/// The comparison function receives two key and value pairs to compare (you
/// can sort by keys or values or their combination as needed).
///
/// Computes in **O(n log n)** time and **O(n)** space. The sort is stable.
pub fn sort_by<F>(&mut self, mut compare: F)
where F: FnMut(&K, &V, &K, &V) -> Ordering,
{
// here we temporarily use the hash field in a bucket to store the old
// index instead.
//
// Save the old hash values in `side_index`.
// Then we can sort `self.entries` in place.
let mut side_index = Vec::from_iter(enumerate(&mut self.entries).map(|(i, elt)| {
replace(&mut elt.hash, HashValue(i)).get()
}));

self.entries.sort_by(move |ei, ej| compare(&ei.key, &ei.value, &ej.key, &ej.value));

// Here we write back the hash values from side_index and fill
// in side_index with a mapping from the old to the new index instead.
for (i, ent) in enumerate(&mut self.entries) {
let old_index = ent.hash.get();
ent.hash = HashValue(replace(&mut side_index[old_index], i));
}

// Apply new index to self.indices
dispatch_32_vs_64!(self.apply_new_index(&side_index));
}

fn apply_new_index<Sz>(&mut self, new_index: &[usize])
where Sz: Size
{
for pos in self.indices.iter_mut() {
if let Some((i, _)) = pos.resolve::<Sz>() {
pos.set_pos::<Sz>(new_index[i]);
}
}
}

/// Sort the key-value pairs of the map and return a by value iterator of
/// the key-value pairs with the result.
///
18 changes: 18 additions & 0 deletions src/set.rs
Original file line number Diff line number Diff line change
@@ -342,6 +342,24 @@ impl<T, S> OrderSet<T, S>
self.map.retain(move |x, &mut ()| keep(x))
}

/// Sort the set’s values by their default ordering.
///
/// See `sort_by` for details.
pub fn sort(&mut self)
where T: Ord,
{
self.map.sort_keys()
}

/// Sort the set’s values in place using the comparison function `compare`.
///
/// Computes in **O(n log n)** time and **O(n)** space. The sort is stable.
pub fn sort_by<F>(&mut self, mut compare: F)
where F: FnMut(&T, &T) -> Ordering,
{
self.map.sort_by(move |a, _, b, _| compare(a, b));
}

/// Sort the values of the set and return a by value iterator of
/// the values with the result.
///
43 changes: 43 additions & 0 deletions tests/quick.rs
Original file line number Diff line number Diff line change
@@ -273,6 +273,49 @@ quickcheck! {
// check the order
itertools::assert_equal(map.keys(), initial_map.keys().filter(|&k| !remove_map.contains_key(k)));
}

fn sort_1(keyvals: Large<Vec<(i8, i8)>>) -> () {
let mut map: OrderMap<_, _> = OrderMap::from_iter(keyvals.to_vec());
let mut answer = keyvals.0;
answer.sort_by_key(|t| t.0);

// reverse dedup: Because OrderMap::from_iter keeps the last value for
// identical keys
answer.reverse();
answer.dedup_by_key(|t| t.0);
answer.reverse();

map.sort_by(|k1, _, k2, _| Ord::cmp(k1, k2));

// check it contains all the values it should
for &(key, val) in &answer {
assert_eq!(map[&key], val);
}

// check the order

let mapv = Vec::from_iter(map);
assert_eq!(answer, mapv);

}

fn sort_2(keyvals: Large<Vec<(i8, i8)>>) -> () {
let mut map: OrderMap<_, _> = OrderMap::from_iter(keyvals.to_vec());
map.sort_by(|_, v1, _, v2| Ord::cmp(v1, v2));
assert_sorted_by_key(map, |t| t.1);
}
}

fn assert_sorted_by_key<I, Key, X>(iterable: I, key: Key)
where I: IntoIterator,
I::Item: Ord + Clone + Debug,
Key: Fn(&I::Item) -> X,
X: Ord,
{
let input = Vec::from_iter(iterable);
let mut sorted = input.clone();
sorted.sort_by_key(key);
assert_eq!(input, sorted);
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]