Skip to content

Commit

Permalink
First commit
Browse files Browse the repository at this point in the history
  • Loading branch information
xapple committed Sep 27, 2011
0 parents commit 7fd4492
Show file tree
Hide file tree
Showing 132 changed files with 17,420 additions and 0 deletions.
22 changes: 22 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Temporary files #
.DS_Store
*.pyc
*.swp
*.swo
*~
\#*
.#*
._*

# IDE files #
/.idea

# Build files #
/MANIFEST

# Build directories #
/build/
/dist/

# Git files #
/.gitattributes
1 change: 1 addition & 0 deletions INFO.url
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
URL=http://bbcf.epfl.ch/track
674 changes: 674 additions & 0 deletions LICENSE.txt

Large diffs are not rendered by default.

49 changes: 49 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
track 1.0.0

Copyright 2011 EPFL BBCF <[email protected]>

track is a python module for reading and writing genomic
data.

It was developed by the Bioinformatics and Biostatistics Core
Facility (BBCF) at the EPFL.

track is released under the GNU General Public License 3.0. A copy
of this license is in the LICENSE.txt file.

Installation
============

track requires:
* Python 2.6 or higher

To install you should download the latest source code from GitHub,
either by going to:

http://github.com/bbcf/track

and clicking on "Downloads", or by cloning the git repository with

$ git clone https://github.com/bbcf/track.git

Once you have the source code, run

$ python setup.py build
$ sudo python setup.py install

to install it. If you need to install it in a particular directory,
use

$ sudo python setup.py install --prefix=/prefix/path

Then the modules will go in /prefix/path/lib/pythonX.Y/site-packages,
where X.Y is the version of Python you run it with.

To run the test suite, in the distribution directory, run

$ nosetests

Full documentation
==================

The full documentation can be found [on our website](http://bbcf.epfl.ch/track).
69 changes: 69 additions & 0 deletions dev/sql_benchmark.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// gcc sql_readwrite.c -lsqlite3 -o c_readwrite
// time c_readwrite

#include <stdio.h>
#include <stdlib.h>
#include <sqlite3.h>

int main(void){
int r;
char* errmsg;
sqlite3* conW;
sqlite3* conR;
r = sqlite3_open("/scratch/sinclair/tmp/write_database.sql", &conW);
if(r) {printf("Can't open db: %s\n", sqlite3_errmsg(conW)); exit(1);}
r = sqlite3_open("/scratch/sinclair/tmp/read_database.sql", &conR);
if(r) {printf("Can't open db: %s\n", sqlite3_errmsg(conR)); exit(1);}
r = sqlite3_exec(conW, "BEGIN", 0, 0, &errmsg);
if(r!=SQLITE_OK) {printf("Can't start transaction: %s\n", errmsg); exit(1);}
char *tables[]={ "table0",
"table1",
"table2",
"table3",
"table4",
"table5",
"table6",
"table7",
"table8",
"table9"};
int n = 10;
int j = 0;
int i;
char queryR[1024];
char queryW[1024];
sqlite3_stmt* stmR;
sqlite3_stmt* stmW;
for(i=0; i<n; i++){
snprintf(queryW, sizeof(queryW), "%s%s%s", "CREATE table '", tables[i], "' (one text, two text, three integer)");
r = sqlite3_exec(conW, queryW, 0, 0, &errmsg);
if(r!=SQLITE_OK) {printf("Can't execute: %s\n", errmsg); exit(1);}

snprintf(queryR, sizeof(queryR), "%s%s%s", "SELECT * from '", tables[i], "'");
r = sqlite3_prepare_v2(conR, queryR, -1, &stmR, 0);
if(r!=SQLITE_OK) {printf("Can't prepare read: %s\n", sqlite3_errmsg(conR)); exit(1);}

snprintf(queryW, sizeof(queryW), "%s%s%s", "INSERT into '", tables[i], "' values (?,?,?)");
r = sqlite3_prepare_v2(conW, queryW, -1, &stmW, 0);
if(r!=SQLITE_OK) {printf("Can't prepare write: %s\n", sqlite3_errmsg(conW)); exit(1);}
while (1){
j++;
r = sqlite3_step(stmR);
if (r == SQLITE_DONE) {break;}
if (r != SQLITE_ROW) {printf("Can't step read statement (%d): %s\n", r, sqlite3_errmsg(conR)); exit(1);}

r = sqlite3_bind_text(stmW, 1, sqlite3_column_text(stmR, 0), 6, 0);
if(r!=SQLITE_OK) {printf("Row %d, can't bind first var of write statement (%d): %s\n", j, r, sqlite3_errmsg(conW)); exit(1);}
r = sqlite3_bind_text(stmW, 2, sqlite3_column_text(stmR, 1), 6, 0);
if(r!=SQLITE_OK) {printf("Row %d, can't bind second var of write statement (%d): %s\n", j, r, sqlite3_errmsg(conW)); exit(1);}
r = sqlite3_bind_int( stmW, 3, sqlite3_column_int( stmR, 2) +1);
if(r!=SQLITE_OK) {printf("Row %d, can't bind third var of write statement (%d): %s\n", j, r, sqlite3_errmsg(conW)); exit(1);}

r = sqlite3_step(stmW);
if(r!=SQLITE_DONE) {printf("Can't step on write statement (%d): %s\n", r, sqlite3_errmsg(conW)); exit(1);}
r = sqlite3_reset(stmW);
if(r!=SQLITE_OK) {printf("Can't reset the write statement (%d): %s\n", r, sqlite3_errmsg(conW)); exit(1);}}}
sqlite3_close(conR);
r = sqlite3_exec(conW, "COMMIT", 0, 0, &errmsg);
if(r!=SQLITE_OK) {printf("Can't commit transaction: %s\n", errmsg); exit(1);}
sqlite3_close(conW);
return 0;}
173 changes: 173 additions & 0 deletions dev/sql_benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
# Modules #
import sqlite3
from inspect import getfile, currentframe
from functools import partial
import os, timeit, random, tempfile

# Variables #
global num_entries
num_entries = 1000000

###############################################################################
# General Functions #
def setup_database(db_path, auto_commit=None):
global connection, cursor
if auto_commit: connection = sqlite3.connect(db_path, isolation_level=None) # commit all the time
else: connection = sqlite3.connect(db_path) # no commits ever (default)
cursor = connection.cursor()
cursor.execute('create table dummy (one text, two text, three integer)')
connection.commit()

def destroy_database(db_path):
os.remove(db_path)

def reopen_database(db_path):
global connection, cursor
cursor.close()
connection.close()
connection = sqlite3.connect(db_path)
cursor = connection.cursor()

def generate_data():
global num_entries
name_gen = tempfile._RandomNameSequence()
for i in range(num_entries):
yield (name_gen.next(), name_gen.next(), random.randint(1,1000))

def time_the_execution(command):
global num_entries
timeit_count = 1
t = timeit.Timer(command)
sec = t.timeit(timeit_count)
string = "%.6f sec for " % sec + command.func.__name__
print string.ljust(40) + "(%.3f usec per entry)" % (1000000*sec / num_entries)

###############################################################################
# Write Fonctions #
def simple_execute(iterable):
global connection, cursor
for x in iterable:
cursor.execute('insert into dummy values (?,?,?)', x)
cursor.close()
connection.commit()
connection.close()

def batch_execute(iterable):
global connection, cursor
cursor.executemany('insert into dummy values (?,?,?)', iterable)
cursor.close()
connection.commit()
connection.close()

def simple_variable_table(iterable):
global connection, cursor
table = 'dummy'
for x in iterable:
cursor.execute('insert into ' + table + ' values (?,?,?)', x)
cursor.close()
connection.commit()
connection.close()

def batch_variable_table(iterable):
global connection, cursor
table = 'dummy'
cursor.executemany('insert into ' + table + ' values (?,?,?)', iterable)
cursor.close()
connection.commit()
connection.close()

def variable_fields(iterable):
global connection, cursor
fields = ['one','two','three']
table = 'dummy'
cursor.executemany('insert into ' + table + ' values (' + ','.join(['?' for x in range(len(fields))]) + ')', iterable)
cursor.close()
connection.commit()
connection.close()

def bad_bad_execute(iterable):
# Don't do this
global connection, cursor
for x in iterable:
cursor.execute('insert into dummy values ("' +x[0]+ '","' +x[1]+ '",' +str(x[2])+ ')')
cursor.close()
connection.commit()
connection.close()

###############################################################################
# Read functions #
def read_one_by_one(command):
cursor.execute(command)
sum = 0
while True:
try:
x = cursor.fetchone()
sum += x[2]
except TypeError as err:
break

def read_all(command):
cursor.execute(command)
sum = 0
data = cursor.fetchall()
data = list(data)
for x in data:
sum += x[2]

def read_iterator(command):
cursor.execute(command)
sum([x[2] for x in cursor])

###############################################################################
# Write #
if __name__=='__main__':
this_dir = '/'.join(os.path.abspath(getfile(currentframe())).split('/')[:-1])+'/'
db_path = this_dir + "test_database.sql"

data = list(generate_data())

print "--write--"
setup_database(db_path)
time_the_execution(partial(simple_execute, data))
destroy_database(db_path)

setup_database(db_path)
time_the_execution(partial(batch_execute, data))
destroy_database(db_path)

setup_database(db_path)
time_the_execution(partial(simple_variable_table, data))
destroy_database(db_path)

setup_database(db_path)
time_the_execution(partial(batch_variable_table, data))
destroy_database(db_path)

setup_database(db_path)
time_the_execution(partial(variable_fields, data))
destroy_database(db_path)

setup_database(db_path)
time_the_execution(partial(bad_bad_execute, data))

###############################################################################
# Read #
print "--read--"
connection = sqlite3.connect(db_path)
cursor = connection.cursor()
command = "select * from dummy"

reopen_database(db_path)
time_the_execution(partial(read_one_by_one, command))

reopen_database(db_path)
time_the_execution(partial(read_all, command))

reopen_database(db_path)
time_the_execution(partial(read_iterator, command))


###############################################################################
cursor.close()
connection.close()
destroy_database(db_path)
Binary file added dev/test_database.sql
Binary file not shown.
5 changes: 5 additions & 0 deletions doc/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Build directories #
/_*

# Bash scripts #
/update.sh
Loading

0 comments on commit 7fd4492

Please sign in to comment.