Skip to content

Commit 1065565

Browse files
didalgolabMariusz Bernacki
authored andcommitted
Refactor Dataset class hierarchy
1 parent 637c86d commit 1065565

File tree

89 files changed

+1550
-3426
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

89 files changed

+1550
-3426
lines changed

chartsy-benchmarking/src/main/java/one/chartsy/benchmarking/ByteBufferMutableHLCDatasetBenchmarkTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* SPDX-License-Identifier: Apache-2.0 */
33
package one.chartsy.benchmarking;
44

5-
import one.chartsy.data.Dataset;
5+
import one.chartsy.base.Dataset;
66
import one.chartsy.data.packed.ByteBufferMutableHLCDataset;
77
import org.openjdk.jmh.annotations.*;
88
import org.openjdk.jmh.runner.Runner;

chartsy-core-ml/src/main/java/one/chartsy/core/ml/Classifiers.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
*/
55
package one.chartsy.core.ml;
66

7-
import one.chartsy.data.Dataset;
8-
import one.chartsy.data.DoubleDataset;
7+
import one.chartsy.base.Dataset;
8+
import one.chartsy.base.DoubleDataset;
99
import one.chartsy.smile.classification.KNN;
1010
import one.chartsy.smile.math.distance.Distance;
1111
import one.chartsy.smile.math.distance.EuclideanDistance;

chartsy-core-ml/src/main/java/one/chartsy/core/ml/Predictors.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
*/
55
package one.chartsy.core.ml;
66

7-
import one.chartsy.data.Dataset;
8-
import one.chartsy.data.DoubleDataset;
7+
import one.chartsy.base.Dataset;
8+
import one.chartsy.base.DoubleDataset;
99
import one.chartsy.util.Pair;
1010

1111
public final class Predictors {

chartsy-core-ml/src/main/java/one/chartsy/core/ml/SmilePredictors.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
* SPDX-License-Identifier: Apache-2.0 */
33
package one.chartsy.core.ml;
44

5-
import one.chartsy.data.Dataset;
6-
import one.chartsy.data.DoubleDataset;
5+
import one.chartsy.base.Dataset;
6+
import one.chartsy.base.DoubleDataset;
77
import one.chartsy.smile.regression.OLS;
88
import one.chartsy.util.Pair;
99

chartsy-core/src/main/java/one/chartsy/base/AbstractRingBuffer.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,10 @@ protected final int arrayIndex(int offset) {
7979
return ((int)nextWrite - offset - 1) & mask;
8080
}
8181

82-
protected final void checkOffset(int offset) {
83-
if (offset >= capacity())
84-
throw new BufferTooSmallException("RingBuffer too small", capacity(), offset + 1);
85-
if (offset < 0 || offset >= length())
86-
throw new IndexOutOfBoundsException(offset);
82+
protected final void checkIndex(int index) {
83+
if (index >= capacity())
84+
throw new BufferTooSmallException("RingBuffer too small", capacity(), index + 1);
85+
if (index < 0 || index >= length())
86+
throw new IndexOutOfBoundsException(index);
8787
}
8888
}

chartsy-core/src/main/java/one/chartsy/base/BufferTooSmallException.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
*/
55
package one.chartsy.base;
66

7-
public class BufferTooSmallException extends RuntimeException {
7+
public class BufferTooSmallException extends IndexOutOfBoundsException {
88
private final int actualCapacity;
99
private final int requiredCapacity;
1010

chartsy-core/src/main/java/one/chartsy/base/Dataset.java

Lines changed: 91 additions & 182 deletions
Original file line numberDiff line numberDiff line change
@@ -5,26 +5,36 @@
55
package one.chartsy.base;
66

77
import one.chartsy.base.dataset.AbstractDataset;
8+
import one.chartsy.base.dataset.AbstractDoubleDataset;
9+
import one.chartsy.base.dataset.AbstractIntDataset;
10+
import one.chartsy.base.dataset.AbstractLongDataset;
811
import one.chartsy.base.dataset.ImmutableDataset;
12+
import one.chartsy.util.Pair;
913

1014
import java.util.AbstractList;
1115
import java.util.Iterator;
1216
import java.util.List;
1317
import java.util.Objects;
1418
import java.util.Spliterator;
1519
import java.util.function.Function;
16-
import java.util.stream.DoubleStream;
17-
import java.util.stream.IntStream;
18-
import java.util.stream.LongStream;
20+
import java.util.function.ToDoubleFunction;
21+
import java.util.function.ToIntFunction;
22+
import java.util.function.ToLongFunction;
1923
import java.util.stream.Stream;
20-
import java.util.stream.StreamSupport;
2124

25+
/**
26+
* An ordered sequence of arbitrary-type data elements.
27+
*
28+
* @author Mariusz Bernacki
29+
*
30+
* @param <E> the type of elements stored in this dataset
31+
*/
2232
public interface Dataset<E> extends SequenceAlike<E, Dataset<E>> {
2333

2434
/**
2535
* Returns the element at the specified position in the dataset. Depending
2636
* on the characteristic of the dataset, the index-ordering may or may not be
27-
* an iterating order.
37+
* an iterating order (see {@link #getOrder()}).
2838
*
2939
* @param index the index of the element to return
3040
* @return the element at the specified position
@@ -47,111 +57,100 @@ default List<E> values() {
4757
return new Values<>(this);
4858
}
4959

50-
default Dataset<E> ref(int n) {
51-
if (n == 0)
60+
default Dataset<E> drop(int maxCount) {
61+
if (maxCount == 0)
5262
return this;
53-
if (n > 0)
54-
throw new IllegalArgumentException("Periods `n` (" + n + ") cannot be positive");
55-
56-
return new AbstractDataset.TransformedDataset<>(this) {
57-
@Override
58-
public E get(int index) {
59-
return dataset.get(index - n);
60-
}
61-
62-
@Override
63-
public int length() {
64-
return Math.max(0, dataset.length() + n);
65-
}
66-
67-
@Override
68-
public Stream<E> stream() {
69-
return getOrder().shift(-n, dataset.stream(), dataset);
70-
}
71-
};
72-
}
63+
if (maxCount < 0)
64+
throw new IllegalArgumentException("Argument `maxCount` (" + maxCount + ") cannot be negative");
7365

74-
default Dataset<E> take(int count) {
75-
if (count <= 0)
76-
throw new IllegalArgumentException("The `count` argument must be positive");
77-
78-
return new AbstractDataset.TransformedDataset<>(this) {
79-
@Override
80-
public int length() {
81-
return Math.min(dataset.length(), count);
82-
}
83-
84-
@Override
85-
public E get(int index) {
86-
return dataset.get(Objects.checkIndex(index, count));
87-
}
88-
89-
@Override
90-
public Stream<E> stream() {
91-
return getOrder().take(count, dataset.stream(), dataset);
92-
}
93-
};
66+
return AbstractDataset.from(this, dataset -> Math.max(0, dataset.length() - maxCount),
67+
(dataset, index) -> dataset.get(index + maxCount),
68+
dataset -> dataset.getOrder().drop(maxCount, dataset.stream(), dataset));
9469
}
9570

96-
default Dataset<E> take(int maxCount, int fromIndex) {
71+
default Dataset<E> take(int maxCount) {
9772
if (maxCount <= 0)
98-
throw new IllegalArgumentException("The `maxCount` (" + maxCount + ") argument must be positive");
73+
throw new IllegalArgumentException("The `maxCount` argument must be positive");
74+
75+
return AbstractDataset.from(this, dataset -> Math.min(dataset.length(), maxCount),
76+
(dataset, index) -> dataset.get(Objects.checkIndex(index, maxCount)),
77+
dataset -> dataset.getOrder().take(maxCount, dataset.stream(), dataset));
78+
}
79+
80+
default Dataset<E> takeExact(int count) {
81+
if (count > length())
82+
throw new IllegalArgumentException("The `takeExact` end index cannot exceed dataset length " + length());
83+
84+
return take(count).toImmutable();
85+
}
86+
87+
default Dataset<E> dropTake(int fromIndex, int maxCount) {
9988
if (fromIndex < 0)
10089
throw new IllegalArgumentException("The `fromIndex` (" + fromIndex + ") argument must be non-negative");
90+
if (maxCount <= 0)
91+
throw new IllegalArgumentException("The `maxCount` (" + maxCount + ") argument must be positive");
92+
93+
return AbstractDataset.from(this, dataset -> Math.max(0, Math.min(dataset.length() - fromIndex, maxCount)),
94+
(dataset, index) -> dataset.get(fromIndex + Objects.checkIndex(index, maxCount)),
95+
dataset -> dataset.getOrder().dropTake(fromIndex, maxCount, dataset.stream(), dataset)
96+
);
97+
}
10198

102-
return new AbstractDataset.TransformedDataset<>(this) {
103-
@Override
104-
public int length() {
105-
return Math.max(0, Math.min(dataset.length() - fromIndex, maxCount));
106-
}
107-
108-
@Override
109-
public E get(int index) {
110-
return dataset.get(fromIndex + Objects.checkIndex(index, maxCount));
111-
}
112-
113-
@Override
114-
public Stream<E> stream() {
115-
return getOrder().take(maxCount, fromIndex, dataset.stream(), dataset);
116-
}
117-
};
99+
default Dataset<E> dropTakeExact(int fromIndex, int count) {
100+
if (length() < count - fromIndex)
101+
throw new IllegalArgumentException("The `dropTakeExact` end index cannot exceed dataset length " + length());
102+
103+
return dropTake(fromIndex, count).toImmutable();
118104
}
119105

120106
default <V> Dataset<V> map(Function<E, V> mapper) {
121107
Objects.requireNonNull(mapper);
122-
return new AbstractDataset.TransformedDataset<>(this) {
123-
@Override
124-
public V get(int index) {
125-
return mapper.apply(dataset.get(index));
126-
}
127-
128-
@Override
129-
public Stream<V> stream() {
130-
return dataset.stream().map(mapper);
131-
}
132-
};
108+
return AbstractDataset.from(this,
109+
(dataset, index) -> mapper.apply(dataset.get(index)),
110+
dataset -> dataset.stream().map(mapper));
111+
}
112+
113+
default DoubleDataset mapToDouble(ToDoubleFunction<E> mapper) {
114+
Objects.requireNonNull(mapper);
115+
return AbstractDoubleDataset.from(this,
116+
(dataset, index) -> mapper.applyAsDouble(dataset.get(index)),
117+
dataset -> dataset.stream().mapToDouble(mapper));
118+
}
119+
120+
default IntDataset mapToInt(ToIntFunction<E> mapper) {
121+
Objects.requireNonNull(mapper);
122+
return AbstractIntDataset.from(this,
123+
(dataset, index) -> mapper.applyAsInt(dataset.get(index)),
124+
dataset -> dataset.stream().mapToInt(mapper));
125+
}
126+
127+
default LongDataset mapToLong(ToLongFunction<E> mapper) {
128+
Objects.requireNonNull(mapper);
129+
return AbstractLongDataset.from(this,
130+
(dataset, index) -> mapper.applyAsLong(dataset.get(index)),
131+
dataset -> dataset.stream().mapToLong(mapper));
132+
}
133+
134+
default <R> Dataset<Pair<E, R>> withRight(Dataset<R> right) {
135+
Objects.requireNonNull(right, "right");
136+
return AbstractDataset.from(this, left -> Math.min(left.length(), right.length()),
137+
(left, index) -> Pair.of(left.get(index), right.get(index)));
138+
}
139+
140+
default Dataset<Pair<E, Double>> withRight(DoubleDataset right) {
141+
return withRight(right.boxed());
142+
}
143+
144+
default Dataset<Pair<E, Integer>> withRight(IntDataset right) {
145+
return withRight(right.boxed());
133146
}
134147

135148
default Dataset<Dataset<E>> subsequences(int len) {
136149
if (len <= 0)
137150
throw new IllegalArgumentException("subsequences length `" + len + "` must be positive");
138151

139-
return new AbstractDataset.TransformedDataset<>(this) {
140-
@Override
141-
public Dataset<E> get(int index) {
142-
return dataset.take(len, index);
143-
}
144-
145-
@Override
146-
public int length() {
147-
return Math.max(0, dataset.length() - len + 1);
148-
}
149-
150-
@Override
151-
public Stream<Dataset<E>> stream() {
152-
return IntStream.range(0, length()).mapToObj(index -> dataset.take(len, index));
153-
}
154-
};
152+
return AbstractDataset.from(this, dataset -> Math.max(0, dataset.length() - len + 1),
153+
(dataset, index) -> dataset.dropTake(index, len));
155154
}
156155

157156
@SuppressWarnings("unchecked")
@@ -176,94 +175,4 @@ public int size() {
176175
return dataset.length();
177176
}
178177
}
179-
180-
interface OfPrimitive<E,
181-
T_SEQ extends OfPrimitive<E, T_SEQ, T_SPLITR>,
182-
T_SPLITR extends Spliterator.OfPrimitive<E, ?, T_SPLITR>>
183-
extends SequenceAlike<E, T_SEQ> {
184-
185-
/**
186-
* Returns a primitive spliterator over the elements in the window.
187-
*
188-
* @return a primitive spliterator
189-
*/
190-
@Override
191-
T_SPLITR spliterator();
192-
193-
}
194-
195-
/**
196-
* An ordered, sliding window of primitive {@code int} values.
197-
*/
198-
interface OfInt extends OfPrimitive<Integer, OfInt, Spliterator.OfInt> {
199-
200-
/**
201-
* Returns the element at the specified position in the window.
202-
*
203-
* @param index the index of the element to return
204-
* @return the element at the specified position in the window
205-
* @throws IndexOutOfBoundsException if the index is out of range
206-
*/
207-
int get(int index);
208-
209-
/**
210-
* Returns a sequential {@code IntStream} with the specified number of elements in the window.
211-
*
212-
* @return a sequential {@code IntStream} over the elements in the window
213-
*/
214-
@Override
215-
default IntStream stream() {
216-
return StreamSupport.intStream(spliterator(), false);
217-
}
218-
}
219-
220-
/**
221-
* An ordered, sliding window of primitive {@code long} values.
222-
*/
223-
interface OfLong extends OfPrimitive<Long, OfLong, Spliterator.OfLong> {
224-
225-
/**
226-
* Returns the element at the specified position in the window.
227-
*
228-
* @param index the index of the element to return
229-
* @return the element at the specified position in the window
230-
* @throws IndexOutOfBoundsException if the index is out of range
231-
*/
232-
long get(int index);
233-
234-
/**
235-
* Returns a sequential {@code LongStream} with the specified number of elements in the window.
236-
*
237-
* @return a sequential {@code LongStream} over the elements in the window
238-
*/
239-
@Override
240-
default LongStream stream() {
241-
return StreamSupport.longStream(spliterator(), false);
242-
}
243-
}
244-
245-
/**
246-
* An ordered, sliding window of primitive {@code double} values.
247-
*/
248-
interface OfDouble extends OfPrimitive<Double, OfDouble, Spliterator.OfDouble> {
249-
250-
/**
251-
* Returns the element at the specified position in the window.
252-
*
253-
* @param index the index of the element to return
254-
* @return the element at the specified position in the window
255-
* @throws IndexOutOfBoundsException if the index is out of range
256-
*/
257-
double get(int index);
258-
259-
/**
260-
* Returns a sequential {@code DoubleStream} with the specified number of elements in the window.
261-
*
262-
* @return a sequential {@code DoubleStream} over the elements in the window
263-
*/
264-
@Override
265-
default DoubleStream stream() {
266-
return StreamSupport.doubleStream(spliterator(), false);
267-
}
268-
}
269178
}

0 commit comments

Comments
 (0)