Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/ncbi/tree-tool
Browse files Browse the repository at this point in the history
  • Loading branch information
Vyacheslav Brover committed Aug 1, 2024
2 parents 363489d + 498817d commit 8e8f316
Show file tree
Hide file tree
Showing 6 changed files with 160 additions and 7 deletions.
6 changes: 6 additions & 0 deletions dm/conversion/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ all: \
attr2_2phylip \
obj_attr2dm \
pairs2dm \
tsv2dm


attr2_2pairs.o: $(DM_HPP)
Expand Down Expand Up @@ -49,4 +50,9 @@ pairs2dm: $(pairs2dmOBJS)
$(CXX) -o $@ $(pairs2dmOBJS) $(LIBS)
$(ECHO)

tsv2dm.o: $(DM_HPP)
tsv2dmOBJS=tsv2dm.o $(DM_OBJ) $(TSV_DIR)/tsv.o
tsv2dm: $(tsv2dmOBJS)
$(CXX) -o $@ $(tsv2dmOBJS) $(LIBS)
$(ECHO)

136 changes: 136 additions & 0 deletions dm/conversion/tsv2dm.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
// tsv2dm.cpp

/*===========================================================================
*
* PUBLIC DOMAIN NOTICE
* National Center for Biotechnology Information
*
* This software/database is a "United States Government Work" under the
* terms of the United States Copyright Act. It was written as part of
* the author's official duties as a United States Government employee and
* thus cannot be copyrighted. This software/database is freely available
* to the public for use. The National Library of Medicine and the U.S.
* Government have not placed any restriction on its use or reproduction.
*
* Although all reasonable efforts have been taken to ensure the accuracy
* and reliability of the software and data, the NLM and the U.S.
* Government do not and cannot warrant the performance or results that
* may be obtained by using this software or data. The NLM and the U.S.
* Government disclaim all warranties, express or implied, including
* warranties of performance, merchantability or fitness for any particular
* purpose.
*
* Please cite the author in any work or product based on this material.
*
* ===========================================================================
*
* Author: Vyacheslav Brover
*
* File Description:
* Convert a .tsv-table into .dm-format
*
*/

#undef NDEBUG

#include "../../common.hpp"
#include "../../tsv/tsv.hpp"
using namespace Common_sp;
#include "../dataset.hpp"
using namespace DM_sp;
#include "../../version.inc"

#include "../../common.inc"



namespace
{



struct ThisApplication : Application
{
ThisApplication ()
: Application ("Print a .tsv-table in .dm-format")
{
version = VERSION;
addPositional ("in", "tsv-table");
addPositional ("col", "Object name column");
}



void body () const final
{
const string inFName = getArg ("in");
const string objColName = getArg ("col");


const TextTable tt (inFName);
tt. qc ();

const TextTable::ColNum objCol = tt. col2num (objColName);

Dataset ds;
FFOR (TextTable::RowNum, i, tt. rows. size ())
{
string name (tt. rows [i] [objCol]);
replace (name, ' ', '_');
ds. appendObj (name);
}
ds. setName2objNum ();

VectorPtr<Attr1> attrs; attrs. reserve (tt. header. size ());
FFOR (TextTable::ColNum, col, tt. header. size ())
{
Attr1* attr = nullptr;
if (col != objCol)
{
const TextTable::Header& h = tt. header [col];
string name (h. name);
replace (name, ' ', '_');
if (h. numeric)
if (h. scientific || h. decimals)
attr = new RealAttr1 (name, ds, h. decimals);
else
attr = new IntAttr1 (name, ds);
else
if (h. choices. size () <= TextTable::Header::choices_max)
attr = new NominAttr1 (name, ds);
}
attrs << attr;
}
ASSERT (attrs. size () == tt. header. size ());
ds. qc ();

FFOR (TextTable::RowNum, i, tt. rows. size ())
{
const StringVector& row = tt. rows [i];
//ASSERT (ds. getName2objNum (row [objCol]) == i); // ' ' -> '_'
FFOR (TextTable::ColNum, col, tt. header. size ())
if (const Attr1* attr = attrs [col])
if (! row [col]. empty ())
var_cast (attr) -> str2value (i, row [col]);
}

ds. qc ();
ds. saveText (cout);
}
};



} // namespace



int main (int argc,
const char* argv[])
{
ThisApplication app;
return app. run (argc, argv);
}



10 changes: 9 additions & 1 deletion dm/dataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ void Obj::qc () const
Root::qc ();

QC_IMPLY (! name. empty (), goodName (name));
QC_ASSERT (! contains (name, ' '));

QC_ASSERT (mult >= 0.0);
QC_ASSERT (mult < inf);
}
Expand Down Expand Up @@ -132,6 +134,8 @@ void Attr::qc () const
Root::qc ();

QC_ASSERT (*dsIt == this);
QC_ASSERT (! contains (name, ' '));
QC_IMPLY (! name. empty (), goodName (name));
}


Expand Down Expand Up @@ -1241,7 +1245,7 @@ void CompactBoolAttr1::setAll (bool value)



// NominAttr1::Dependene
// NominAttr1::Dependence

void NominAttr1::Dependence::qc () const
{
Expand Down Expand Up @@ -1340,6 +1344,7 @@ void NominAttr1::summary (ostream & /*f*/) const
void NominAttr1::str2value (size_t objNum,
const string &s)
{
#if 0
size_t index = missing;
FFOR (size_t, i, categories. size ())
if (categories [i] == s)
Expand All @@ -1356,6 +1361,9 @@ void NominAttr1::str2value (size_t objNum,
}

values [objNum] = index;
#else
values [objNum] = category2index (s);
#endif
}


Expand Down
5 changes: 4 additions & 1 deletion dm/dataset.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ struct Obj : Root, DisjointCluster
// Object
{
string name;
// Me be empty()
// May be empty()
// No space
Real mult {1.0};
// >= 0.0
// Multiplicity (absolute frequency)
Expand Down Expand Up @@ -151,6 +152,7 @@ struct Attr : Root, Nocopy
public:
string name;
// May be empty()
// No space
bool rightAlign {false};
// true iff the value strings are to be right-aligned
/*
Expand Down Expand Up @@ -585,6 +587,7 @@ struct BoolAttr1 : NumAttr1
{
typedef ebool Value;
static const Value missing;
//array<bool,string> names; ??


protected:
Expand Down
2 changes: 1 addition & 1 deletion dm/linreg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ struct ThisApplication : Application

Dataset ds (inFName);
if (intercept)
ds. addRealAttr1Unit ();
ds. addRealAttr1Unit ("intercept");
ds. qc ();

const auto targetAttr = checkPtr (ds. name2attr (target)) -> asRealAttr1 ();
Expand Down
8 changes: 4 additions & 4 deletions phylogeny/data/featureTree/obj.featureTree
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@


# Objects: 297
# Interior nodes: 235
# Interior nodes: 241
# Common core features: 0
# Singleton features: 0
# Other features: 907
Genome size ave.: 487.404
Time: Used
Lambda_0 = 4.486e-01
Initial time = 7.420e-02
Lambda_0 = 4.567e-01
Initial time = 7.409e-02
timeOptimFrac = 1.000e+00
# Root core features = 90
Tree length min. = 0
Tree length = 73368
Tree length = 73180

0 comments on commit 8e8f316

Please sign in to comment.