Bump BioSequences/FASTX to v3/v2 (#16)

Bump BioSequences/FASTX to v3/v2
BioJulia · Mar 6, 2023 · ac218f6 · ac218f6
1 parent e7d198c
commit ac218f6
Show file tree

Hide file tree

Showing 9 changed files with 34 additions and 34 deletions.
diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml
@@ -12,7 +12,7 @@ jobs:
       fail-fast: false
       matrix:
         julia-version:
-          - '1.0' # LTS
+          - '1.6' # LTS
           - '1'
         julia-arch: [x86]
         os: [ubuntu-latest, windows-latest, macOS-latest] # TODO: Work on windows-latest

diff --git a/Project.toml b/Project.toml
@@ -8,9 +8,9 @@ BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
 FASTX = "c2308a5c-f048-11e8-3e8a-31650f418d12"
 
 [compat]
-BioSequences = "2"
-FASTX = "1.1"
-julia = "1"
+BioSequences = "3"
+FASTX = "2"
+julia = "1.6"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

diff --git a/src/ReadDatastores.jl b/src/ReadDatastores.jl
@@ -241,7 +241,7 @@ data blob (i.e. `sizeof(BioSequences.encoded_data(seq))`).
       sequence type.
 """
 function _load_sequence_data!(ds::ReadDatastore{T}, seq::T) where {T<:LongSequence}
-    seqdata = BioSequences.encoded_data(seq)
+    seqdata = seq.data
     GC.@preserve seqdata unsafe_read(stream(ds), pointer(seqdata), sizeof(seqdata))
     return seq
 end

diff --git a/src/linked-reads.jl b/src/linked-reads.jl
@@ -10,13 +10,13 @@ const LinkedTag = UInt32
 mutable struct LinkedReadData{A<:DNAAlphabet}
     seq1::LongSequence{A}
     seq2::LongSequence{A}
-    seqlen1::UInt64
-    seqlen2::UInt64
+    seqsize1::UInt64
+    seqsize2::UInt64
     tag::LinkedTag
 end
 
 Base.isless(a::LinkedReadData, b::LinkedReadData) = a.tag < b.tag
-LinkedReadData{A}(len) where {A<:DNAAlphabet} = LinkedReadData{A}(LongSequence{A}(len), LongSequence{A}(len), zero(UInt64), zero(UInt64), zero(LinkedTag))
+LinkedReadData{A}(len) where {A<:DNAAlphabet} = LinkedReadData{A}(LongSequence{A}(undef, len), LongSequence{A}(undef, len), zero(UInt64), zero(UInt64), zero(LinkedTag))
 
 const LinkedDS_Version = 0x0003
 
@@ -38,10 +38,10 @@ function _extract_tag_and_sequences!(current_data::LinkedReadData, fwrec::FASTQ.
         end
     end
     current_data.tag = newtag
-    current_data.seqlen1 = UInt64(min(max_read_len, FASTQ.seqlen(fwrec)))
-    current_data.seqlen2 = UInt64(min(max_read_len, FASTQ.seqlen(rvrec)))
-    copyto!(current_data.seq1, 1, fwrec, 1, current_data.seqlen1)
-    copyto!(current_data.seq2, 1, rvrec, 1, current_data.seqlen2)
+    current_data.seqsize1 = UInt64(min(max_read_len, FASTQ.seqsize(fwrec)))
+    current_data.seqsize2 = UInt64(min(max_read_len, FASTQ.seqsize(rvrec)))
+    copyto!(current_data.seq1, 1, fwrec, 1, current_data.seqsize1)
+    copyto!(current_data.seq2, 1, rvrec, 1, current_data.seqsize2)
 end
 
 struct LinkedReads{A<:DNAAlphabet} <: ShortReads{A}
@@ -121,7 +121,7 @@ function LinkedReads{A}(fwq::FASTQ.Reader, rvq::FASTQ.Reader, outfile::String, n
     fwrec = FASTQ.Record()
     rvrec = FASTQ.Record()
     chunk_data = [LinkedReadData{A}(max_read_len) for _ in 1:chunksize]
-    datachunksize = length(BioSequences.encoded_data(first(chunk_data).seq1))
+    datachunksize = length(first(chunk_data).seq1.data)
 
     while !eof(fwq) && !eof(rvq)
         # Read in `chunksize` read pairs.
@@ -150,10 +150,10 @@ function LinkedReads{A}(fwq::FASTQ.Reader, rvq::FASTQ.Reader, outfile::String, n
         for j in 1:chunkfill
             cd_j = chunk_data[j]
             write(chunk_fd, cd_j.tag)
-            write(chunk_fd, cd_j.seqlen1)
-            write(chunk_fd, BioSequences.encoded_data(cd_j.seq1))
-            write(chunk_fd, cd_j.seqlen2)
-            write(chunk_fd, BioSequences.encoded_data(cd_j.seq2))
+            write(chunk_fd, cd_j.seqsize1)
+            write(chunk_fd, cd_j.seq1.data)
+            write(chunk_fd, cd_j.seqsize2)
+            write(chunk_fd, cd_j.seq2.data)
         end
         close(chunk_fd)
         push!(chunk_files, string("sorted_chunk_", length(chunk_files), ".data"))

diff --git a/src/long-reads.jl b/src/long-reads.jl
@@ -94,7 +94,7 @@ function LongReads{A}(rdr::FASTQ.Reader, outfile::String, name::Union{String,Sym
     writestring(ofs, String(name))
 
     record = FASTQ.Record()
-    seq = LongSequence{A}(min_size)
+    seq = LongSequence{A}(undef, min_size)
 
     @info "Building long read datastore from FASTQ file"
 
@@ -109,7 +109,7 @@ function LongReads{A}(rdr::FASTQ.Reader, outfile::String, name::Union{String,Sym
             end
             rethrow()
         end
-        seq_len = FASTQ.seqlen(record)
+        seq_len = FASTQ.seqsize(record)
         if seq_len < min_size
             discarded = discarded + 1
             continue
@@ -186,6 +186,6 @@ end
 @inline function Base.getindex(lrds::LongReads, idx::Integer)
     @boundscheck checkbounds(lrds, idx)
     pos_size = _inbounds_index_of_sequence(lrds, idx)
-    seq = eltype(lrds)(pos_size.sequence_size)
+    seq = eltype(lrds)(undef, pos_size.sequence_size)
     return inbounds_load_sequence!(lrds, pos_size, seq)
 end
diff --git a/src/paired-reads.jl b/src/paired-reads.jl
@@ -107,11 +107,11 @@ function PairedReads{A}(rdrx::FASTQ.Reader, rdry::FASTQ.Reader,
     # Create and allocate the sequence and record objects.
     lread = FASTQ.Record()
     rread = FASTQ.Record()
-    lseq = LongSequence{A}(maxsize)
-    rseq = LongSequence{A}(maxsize)
+    lseq = LongSequence{A}(undef, maxsize)
+    rseq = LongSequence{A}(undef, maxsize)
 
     #chunksize::UInt64 = BioSequences.seq_data_len(DNAAlphabet{4}, maxsize)
-    chunksize::UInt64 = length(BioSequences.encoded_data(lseq))
+    chunksize::UInt64 = length(lseq.data)
     bps = UInt64(BioSequences.bits_per_symbol(A()))
 
     fd = open(outfile * ".prseq", "w")
@@ -143,8 +143,8 @@ function PairedReads{A}(rdrx::FASTQ.Reader, rdry::FASTQ.Reader,
             rethrow()
         end
 
-        llen = UInt64(FASTQ.seqlen(lread))
-        rlen = UInt64(FASTQ.seqlen(rread))
+        llen = UInt64(FASTQ.seqsize(lread))
+        rlen = UInt64(FASTQ.seqsize(rread))
         # If either read is too short, discard them both.
         if llen < minsize || rlen < minsize
             discarded += 1

diff --git a/src/sequence-buffer.jl b/src/sequence-buffer.jl
@@ -41,7 +41,7 @@ end
 
 @inline function _load_sequence_data!(seq::LongSequence{A}, sb::DatastoreBuffer, offset::Integer) where {A<:DNAAlphabet}
     bufdata = buffer_array(sb)
-    seqdata = BioSequences.encoded_data(seq)
+    seqdata = seq.data
     GC.@preserve bufdata begin
         for i in eachindex(seqdata)
             seqdata[i] = unsafe_load(convert(Ptr{UInt64}, pointer(bufdata, offset + 1)))
@@ -74,7 +74,7 @@ end
     buffer_offset = file_offset - buffer_position(sb)
     sequence_length = unsafe_load(convert(Ptr{UInt64}, pointer(buffer_array(sb), buffer_offset + 1)))
     buffer_offset = buffer_offset + sizeof(UInt64)
-    seq = eltype(sb)(sequence_length)
+    seq = eltype(sb)(undef, sequence_length)
 
     return _load_sequence_data!(seq, sb, buffer_offset)
 end
@@ -125,7 +125,7 @@ end
     @boundscheck checkbounds(sb, idx)
     file_index = _inbounds_index_of_sequence(datastore(sb), idx)
     _check_for_buffer_refresh!(sb, file_index)
-    seq = eltype(sb)(file_index.sequence_size)
+    seq = eltype(sb)(undef, file_index.sequence_size)
     buffer_offset = file_index.offset - buffer_position(sb)
     return _load_sequence_data!(seq, sb, buffer_offset)
 end

diff --git a/src/short-reads.jl b/src/short-reads.jl
@@ -7,8 +7,8 @@ abstract type ShortReads{A<:DNAAlphabet} <: ReadDatastore{LongSequence{A}} end
 @inline function inbounds_load_sequence!(ds::ShortReads{A}, i::Integer, seq::LongSequence{A}) where {A<:DNAAlphabet}
     pos = _offset_of_sequence(ds, i)
     seek(stream(ds), pos)
-    seqlen = read(stream(ds), UInt64)
-    resize!(seq, seqlen)
+    seqsize = read(stream(ds), UInt64)
+    resize!(seq, seqsize)
     return _load_sequence_data!(ds, seq)
 end
 
@@ -19,7 +19,7 @@ end
 
 @inline function Base.getindex(sr::ShortReads{A}, idx::Integer) where {A<:DNAAlphabet}
     @boundscheck checkbounds(sr, idx)
-    seq = eltype(sr)(max_read_length(sr))
+    seq = eltype(sr)(undef, max_read_length(sr))
     return inbounds_load_sequence!(sr, idx, seq)
 end
 

diff --git a/test/long-reads.jl b/test/long-reads.jl
@@ -1,15 +1,15 @@
 @testset "Long read datastores" begin
-    function get_fastq_seqs(file)
+    function get_fastq_seqs(::Type{A}, file) where {A<:DNAAlphabet}
         seqs = map(open(FASTQ.Reader, file) do rdr
             collect(rdr)
         end) do rec
-            FASTQ.sequence(LongDNASeq, rec)
+            FASTQ.sequence(LongSequence{A}, rec)
         end
         return seqs
     end
 
     function check_round_trip(::Type{A}, FQ) where {A<:DNAAlphabet}
-        seqs = get_fastq_seqs(FQ)
+        seqs = get_fastq_seqs(A, FQ)
         fq = open(FASTQ.Reader, FQ)
         ds = LongReads{A}(fq, "human-nanopore", "human-nanopore", 0)
         ds2 = open(LongReads{A}, "human-nanopore.loseq")