Skip to content

Commit 4a581ba

Browse files
Brandon Linfacebook-github-bot
Brandon Lin
authored andcommitted
Implement LengthsToOffsets operator in Caffe2 (pytorch#46590)
Summary: Pull Request resolved: pytorch#46590 This operator is very similar to LengthsToRanges but doesn't pack the offsets next to the original lengths. Reviewed By: yf225 Differential Revision: D24419746 fbshipit-source-id: aa8b014588bb22eced324853c545f8684086c4e4
1 parent 18d273d commit 4a581ba

File tree

3 files changed

+110
-14
lines changed

3 files changed

+110
-14
lines changed

caffe2/operators/utility_ops.cc

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ REGISTER_CPU_OPERATOR(GatherRanges, GatherRangesOp<CPUContext>);
5959
REGISTER_CPU_OPERATOR(LengthsGather, LengthsGatherOp<CPUContext>);
6060
REGISTER_CPU_OPERATOR(LengthsToSegmentIds, LengthsToSegmentIdsOp<CPUContext>);
6161
REGISTER_CPU_OPERATOR(LengthsToRanges, LengthsToRangesOp<CPUContext>);
62+
REGISTER_CPU_OPERATOR(LengthsToOffsets, LengthsToOffsetsOp<CPUContext>);
6263
REGISTER_CPU_OPERATOR(SegmentIdsToLengths, SegmentIdsToLengthsOp<CPUContext>);
6364
REGISTER_CPU_OPERATOR(SegmentIdsToRanges, SegmentIdsToRangesOp<CPUContext>);
6465
REGISTER_CPU_OPERATOR(LengthsToWeights, LengthsToWeightsOp<CPUContext>);
@@ -522,20 +523,20 @@ Another output LENGTHS represents each example length within OUTPUT
522523
"LENGTHS",
523524
"1-D tensor of size N with lengths over gathered data"
524525
" for each row in a batch. sum(LENGTHS) == OUTPUT.size()")
525-
.TensorInferenceFunction(OpSchema::NeedsAllInputShapes([](
526-
const OperatorDef& /* unused */, const vector<TensorShape>& in) {
527-
std::vector<TensorShape> out(2);
528-
529-
int total = 1;
530-
for (auto d : in[0].dims()) {
531-
total *= d;
532-
}
533-
out[0].add_dims(total);
534-
out[0].set_data_type(in[0].data_type());
535-
out[1].add_dims(in[1].dims(0));
536-
out[1].set_data_type(in[1].data_type());
537-
return out;
538-
}));
526+
.TensorInferenceFunction(OpSchema::NeedsAllInputShapes(
527+
[](const OperatorDef& /* unused */, const vector<TensorShape>& in) {
528+
std::vector<TensorShape> out(2);
529+
530+
int total = 1;
531+
for (auto d : in[0].dims()) {
532+
total *= d;
533+
}
534+
out[0].add_dims(total);
535+
out[0].set_data_type(in[0].data_type());
536+
out[1].add_dims(in[1].dims(0));
537+
out[1].set_data_type(in[1].data_type());
538+
return out;
539+
}));
539540

540541
OPERATOR_SCHEMA(LengthsGather)
541542
.NumInputs(3)
@@ -636,6 +637,30 @@ For example, `[1, 3, 0, 2]` transforms into `[[0, 1], [1, 3], [4, 0], [4, 2]]`.
636637
"ranges",
637638
"2D tensor of shape len(lengths) X 2 and the same type as `lengths`");
638639

640+
OPERATOR_SCHEMA(LengthsToOffsets)
641+
.NumInputs(1)
642+
.NumOutputs(1)
643+
.SetDoc(R"DOC(
644+
Given a vector of segment lengths, returns a vector of offsets from these lengths,
645+
which will have the same size as the input vector. Output is going to have
646+
the same type as input. For long tensors explicit casting from int32 to int64
647+
might be necessary prior to this op.
648+
649+
For example, `[1, 3, 0, 2]` transforms into `[0, 1, 4, 4]`.
650+
)DOC")
651+
.Input(0, "lengths", "1D tensor of int32 or int64 segment lengths.")
652+
.Output(0, "offsets", "1D tensor of the same shape and type as `lengths`")
653+
.TensorInferenceFunction([](const OperatorDef& def,
654+
const vector<TensorShape>& in) {
655+
const ArgumentHelper args(def);
656+
bool include_last_offset =
657+
args.GetSingleArgument<bool>("include_last_offset", false);
658+
vector<int> out_shape(in[0].dims().begin(), in[0].dims().end());
659+
out_shape[0] += include_last_offset ? 1 : 0;
660+
return vector<TensorShape>{
661+
CreateTensorShape(out_shape, in[0].data_type())};
662+
});
663+
639664
OPERATOR_SCHEMA(SegmentIdsToLengths)
640665
.NumInputs(1, 2)
641666
.NumOutputs(1)

caffe2/operators/utility_ops.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -918,6 +918,45 @@ class LengthsToRangesOp : public Operator<Context> {
918918
}
919919
};
920920

921+
template <class Context>
922+
class LengthsToOffsetsOp : public Operator<Context> {
923+
public:
924+
USE_OPERATOR_CONTEXT_FUNCTIONS;
925+
926+
template <class... Args>
927+
explicit LengthsToOffsetsOp(Args&&... args)
928+
: Operator<Context>(std::forward<Args>(args)...),
929+
include_last_offset_(this->template GetSingleArgument<bool>(
930+
"include_last_offset",
931+
false)) {}
932+
933+
bool RunOnDevice() override {
934+
auto& input = Input(0);
935+
auto* output = Output(0);
936+
auto* input_data = input.template data<int32_t>();
937+
938+
CAFFE_ENFORCE(input.sizes().size() == 1, "Input must be a vector.");
939+
auto size = input.numel();
940+
941+
output->Resize(size + (include_last_offset_ ? 1 : 0));
942+
auto* output_data = output->template mutable_data<int32_t>();
943+
944+
int32_t offset = 0;
945+
for (int i = 0; i < size; ++i) {
946+
auto len = input_data[i];
947+
output_data[i] = offset;
948+
offset += len;
949+
}
950+
if (include_last_offset_) {
951+
output_data[size] = offset;
952+
}
953+
return true;
954+
}
955+
956+
private:
957+
bool include_last_offset_;
958+
};
959+
921960
template <class Context>
922961
class SegmentIdsToLengthsOp : public Operator<Context> {
923962
public:

caffe2/python/hypothesis_test.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -994,6 +994,38 @@ def op_ref(x):
994994
inputs=[np.array(lengths, dtype=np.int32)],
995995
reference=op_ref)
996996

997+
@given(
998+
lengths=st.lists(
999+
st.integers(min_value=0, max_value=10), min_size=0, max_size=10
1000+
),
1001+
include_last_offset=st.booleans(),
1002+
**hu.gcs_cpu_only
1003+
)
1004+
@settings(deadline=None)
1005+
def test_lengths_to_offsets(self, lengths, include_last_offset, gc, dc):
1006+
op = core.CreateOperator(
1007+
"LengthsToOffsets",
1008+
["lengths"],
1009+
["ranges"],
1010+
include_last_offset=include_last_offset,
1011+
)
1012+
1013+
def op_ref(x):
1014+
if not x.size:
1015+
arr = [x.reshape(0)]
1016+
else:
1017+
arr = [np.concatenate(([0], np.cumsum(x)[:-1]))]
1018+
if include_last_offset:
1019+
arr[0] = np.concatenate((arr[0], np.array([np.sum(x)])))
1020+
return tuple(arr)
1021+
1022+
self.assertReferenceChecks(
1023+
device_option=gc,
1024+
op=op,
1025+
inputs=[np.array(lengths, dtype=np.int32)],
1026+
reference=op_ref,
1027+
)
1028+
9971029
@given(prediction=hu.arrays(dims=[10, 3],
9981030
elements=hu.floats(allow_nan=False,
9991031
allow_infinity=False,

0 commit comments

Comments
 (0)