forked from thegenemyers/DEXTRACTOR
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathQV.h
73 lines (54 loc) · 3.16 KB
/
QV.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
/*******************************************************************************************
*
* Compressor/decompressor for .quiv files: customized Huffman codes for each stream based on
* the histogram of values occuring in a given file. The two low complexity streams
* (deletionQV and substitutionQV) use a Huffman coding of the run length of the prevelant
* character.
*
* Author: Gene Myers
* Date: Jan 18, 2014
* Modified: July 25, 2014
*
********************************************************************************************/
#ifndef _QV_COMPRESSOR
#define _QV_COMPRESSOR
// A PacBio compression scheme
typedef struct
{ void *delScheme; // Huffman scheme for deletion QVs
void *insScheme; // Huffman scheme for insertion QVs
void *mrgScheme; // Huffman scheme for merge QVs
void *subScheme; // Huffman scheme for substitution QVs
void *dRunScheme; // Huffman scheme for deletion run lengths (if delChar > 0)
void *sRunScheme; // Huffman scheme for substitution run lengths (if subChar > 0)
int delChar; // If > 0, run-encoded deletion value
int subChar; // If > 0, run-encoded substitution value
int flip; // Need to flip multi-byte integers
char *prefix; // Header line prefix
} QVcoding;
// Read the next nlines of input, and QVentry returns a pointer to the first line if needed.
int Read_Lines(FILE *input, int nlines);
char *QVentry();
// Read the .quiva file on input and record frequency statistics.
void QVcoding_Scan(FILE *input);
// Given QVcoding_Scan has been called at least once, create an encoding scheme based on
// the accumulated statistics and return a pointer to it. The returned encoding object
// is *statically allocated within the routine. If lossy is set then use a lossy scaling
// for the insertion and merge streams.
QVcoding *Create_QVcoding(int lossy);
// Read/write a coding scheme to input/output. The encoding object returned by the reader
// is *statically* allocated within the routine.
QVcoding *Read_QVcoding(FILE *input);
void Write_QVcoding(FILE *output, QVcoding *coding);
// Free all the auxiliary storage associated with coding (but not the object itself!)
void Free_QVcoding(QVcoding *coding);
// Assuming the file pointer is positioned just beyond an entry header line, read the
// next set of 5 QV lines, compress them according to 'coding', and output. If lossy
// is set then the scheme is a lossy one.
void Compress_Next_QVentry(FILE *input, FILE *output, QVcoding *coding, int lossy);
// Assuming the input is position just beyond the compressed encoding of an entry header,
// read the set of compressed encodings for the ensuing 5 QV vectors, decompress them,
// and place their decompressed values into entry which is a 5 element array of character
// pointers. The parameter rlen computed from the preceeding header line, critically
// provides the length of each of the 5 vectors.
void Uncompress_Next_QVentry(FILE *input, char **entry, QVcoding *coding, int rlen);
#endif // _QV_COMPRESSOR