Skip to content

Commit 7d75d5a

Browse files
committed
Implement alignments using C API
1 parent b003a1f commit 7d75d5a

File tree

5 files changed

+279
-103
lines changed

5 files changed

+279
-103
lines changed

python/_tskitmodule.c

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6079,6 +6079,99 @@ TreeSequence_get_individuals_nodes(TreeSequence *self)
60796079
return ret;
60806080
}
60816081

6082+
static PyObject *
6083+
TreeSequence_decode_alignments(TreeSequence *self, PyObject *args, PyObject *kwds)
6084+
{
6085+
int err;
6086+
PyObject *ret = NULL;
6087+
PyObject *py_ref, *py_nodes, *py_missing;
6088+
PyArrayObject *nodes_array = NULL;
6089+
const char *ref_seq;
6090+
Py_ssize_t ref_len, missing_len;
6091+
tsk_id_t *nodes;
6092+
tsk_size_t num_nodes;
6093+
double left, right;
6094+
char missing_char;
6095+
const char *missing_utf8;
6096+
int isolated_as_missing = 1;
6097+
tsk_flags_t options = 0;
6098+
PyObject *buf_obj = NULL;
6099+
char *buf = NULL;
6100+
6101+
static char *kwlist[] = { "reference_sequence", "nodes", "left", "right",
6102+
"missing_data_character", "isolated_as_missing", NULL };
6103+
6104+
if (TreeSequence_check_state(self) != 0) {
6105+
goto out;
6106+
}
6107+
6108+
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOddOp", kwlist, &py_ref, &py_nodes,
6109+
&left, &right, &py_missing, &isolated_as_missing)) {
6110+
goto out;
6111+
}
6112+
6113+
if (!PyBytes_Check(py_ref)) {
6114+
PyErr_SetString(PyExc_TypeError, "reference_sequence must be bytes");
6115+
goto out;
6116+
}
6117+
if (PyBytes_AsStringAndSize(py_ref, (char **) &ref_seq, &ref_len) < 0) {
6118+
goto out;
6119+
}
6120+
6121+
if (!PyUnicode_Check(py_missing)) {
6122+
PyErr_SetString(
6123+
PyExc_TypeError, "missing_data_character must be a (length 1) string");
6124+
goto out;
6125+
}
6126+
missing_utf8 = PyUnicode_AsUTF8AndSize(py_missing, &missing_len);
6127+
if (missing_utf8 == NULL) {
6128+
goto out;
6129+
}
6130+
if (missing_len != 1) {
6131+
PyErr_SetString(
6132+
PyExc_TypeError, "missing_data_character must be a single character");
6133+
goto out;
6134+
}
6135+
missing_char = missing_utf8[0];
6136+
6137+
if (!isolated_as_missing) {
6138+
options |= TSK_ISOLATED_NOT_MISSING;
6139+
}
6140+
6141+
nodes_array = (PyArrayObject *) PyArray_FROMANY(
6142+
py_nodes, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);
6143+
if (nodes_array == NULL) {
6144+
goto out;
6145+
}
6146+
num_nodes = (tsk_size_t) PyArray_DIM(nodes_array, 0);
6147+
nodes = PyArray_DATA(nodes_array);
6148+
6149+
Py_ssize_t total
6150+
= (Py_ssize_t)((size_t) num_nodes * (size_t)((tsk_size_t)(right - left)));
6151+
buf_obj = PyBytes_FromStringAndSize(NULL, total);
6152+
if (buf_obj == NULL) {
6153+
goto out;
6154+
}
6155+
buf = PyBytes_AS_STRING(buf_obj);
6156+
6157+
Py_BEGIN_ALLOW_THREADS err = tsk_treeseq_decode_alignments(self->tree_sequence,
6158+
ref_seq, (tsk_size_t) ref_len, nodes, num_nodes, left, right, missing_char, buf,
6159+
options);
6160+
Py_END_ALLOW_THREADS if (err != 0)
6161+
{
6162+
handle_library_error(err);
6163+
goto out;
6164+
}
6165+
6166+
ret = buf_obj;
6167+
buf_obj = NULL;
6168+
6169+
out:
6170+
Py_XDECREF(nodes_array);
6171+
Py_XDECREF(buf_obj);
6172+
return ret;
6173+
}
6174+
60826175
static PyObject *
60836176
TreeSequence_get_mutations_edge(TreeSequence *self)
60846177
{
@@ -8660,6 +8753,10 @@ static PyMethodDef TreeSequence_methods[] = {
86608753
.ml_meth = (PyCFunction) TreeSequence_get_individuals_nodes,
86618754
.ml_flags = METH_NOARGS,
86628755
.ml_doc = "Returns an array of the node ids for each individual" },
8756+
{ .ml_name = "decode_alignments",
8757+
.ml_meth = (PyCFunction) TreeSequence_decode_alignments,
8758+
.ml_flags = METH_VARARGS | METH_KEYWORDS,
8759+
.ml_doc = "Decode full alignments for given nodes and interval." },
86638760
{ .ml_name = "get_mutations_edge",
86648761
.ml_meth = (PyCFunction) TreeSequence_get_mutations_edge,
86658762
.ml_flags = METH_NOARGS,

0 commit comments

Comments
 (0)