Skip to content

Commit b440e65

Browse files
authored
SCALA-555: add RandomFixedSizeSample (#801)
1 parent 59ebd15 commit b440e65

File tree

2 files changed

+120
-0
lines changed

2 files changed

+120
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
package com.baeldung.scala.randomfixedsizesample
2+
3+
import scala.annotation.tailrec
4+
import scala.util.Random
5+
6+
object RandomFixedSizeSample extends App {
7+
8+
// 1. Using recursion and Random.nextInt
9+
def getRandomSampleRec[T](list: List[T], size: Int): List[T] = {
10+
@tailrec
11+
def rec(xs: List[T], acc: List[T]): List[T] = {
12+
13+
if (acc.size == size) {
14+
acc
15+
} else {
16+
17+
val index = Random.nextInt(xs.size)
18+
val (left, right) = xs.splitAt(index)
19+
val (xsUpd, next) = if (right.nonEmpty) {
20+
(left ::: right.tail, right.head)
21+
} else {
22+
(left.dropRight(1), left.tail.last)
23+
}
24+
rec(xsUpd, next :: acc)
25+
}
26+
}
27+
28+
if (size == 0) {
29+
List.empty[T]
30+
} else if (size > list.size) {
31+
list
32+
} else {
33+
rec(list, List.empty[T])
34+
}
35+
36+
}
37+
38+
// 2. Using zip with random numbers, sort and take
39+
def getRandomSampleZip[T](list: List[T], size: Int): List[T] =
40+
list
41+
.map(elem => (Random.nextInt(), elem))
42+
.sortBy(_._1)
43+
.map(_._2)
44+
.take(size)
45+
46+
// 3. Using shuffle and take
47+
def getRandomSampleShuffle[T](list: List[T], size: Int): List[T] =
48+
Random.shuffle(list).take(size)
49+
50+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
package com.baeldung.scala.randomfixedsizesample
2+
3+
import org.scalatest.wordspec.AnyWordSpec
4+
import org.scalatest.matchers.should.Matchers
5+
6+
import scala.annotation.tailrec;
7+
8+
class RandomFixedSizeSampleSpec extends AnyWordSpec with Matchers {
9+
10+
"RandomFixedSizeSample" should {
11+
"create a random sample out of the initial List" in {
12+
val list = List.range(0, 100)
13+
val sampleSize = 10
14+
val list_0 = RandomFixedSizeSample.getRandomSampleRec(list, sampleSize)
15+
val list_1 = RandomFixedSizeSample.getRandomSampleZip(list, sampleSize)
16+
val list_2 =
17+
RandomFixedSizeSample.getRandomSampleShuffle(list, sampleSize)
18+
19+
list_0.size shouldBe sampleSize
20+
list_1.size shouldBe sampleSize
21+
list_2.size shouldBe sampleSize
22+
23+
list_0.toSet.size shouldBe list_0.size
24+
list_1.toSet.size shouldBe list_1.size
25+
list_2.toSet.size shouldBe list_2.size
26+
27+
isSorted(list_0) shouldBe false
28+
isSorted(list_1) shouldBe false
29+
isSorted(list_2) shouldBe false
30+
}
31+
"ensure getRandomSampleShuffle is the most performant, then goes getRandomSampleZip and then getRandomSampleRec" in {
32+
val list = List.range(0, 10_000)
33+
val sampleSize = 100
34+
35+
val start_0 = System.nanoTime()
36+
RandomFixedSizeSample.getRandomSampleRec(list, sampleSize)
37+
val end_0 = System.nanoTime()
38+
val duration_0 = end_0 - start_0
39+
40+
val start_1 = System.nanoTime()
41+
RandomFixedSizeSample.getRandomSampleZip(list, sampleSize)
42+
val end_1 = System.nanoTime()
43+
val duration_1 = end_1 - start_1
44+
45+
val start_2 = System.nanoTime()
46+
RandomFixedSizeSample.getRandomSampleShuffle(list, sampleSize)
47+
val end_2 = System.nanoTime()
48+
val duration_2 = end_2 - start_2
49+
50+
duration_0 should be > duration_1
51+
duration_1 should be > duration_2
52+
}
53+
54+
}
55+
56+
def isSorted[T](list: List[T])(implicit o: Ordering[T]): Boolean = {
57+
@tailrec def iter(head: T, tail: List[T]): Boolean =
58+
if (tail.isEmpty) true
59+
else if (o.lt(tail.head, head)) false
60+
else {
61+
iter(tail.head, tail.tail)
62+
}
63+
64+
list match {
65+
case Nil => true
66+
case head :: tail => iter(head, tail)
67+
}
68+
}
69+
70+
}

0 commit comments

Comments
 (0)