Skip to content

Commit 3d0af46

Browse files
committed
A run is not complete until the BCL files appear, regardless of RTAComplete.txt
appearing. This may or may not resolve the pipeline failures we've had.
1 parent e48127a commit 3d0af46

File tree

6 files changed

+48
-5
lines changed

6 files changed

+48
-5
lines changed

RunStatus.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ def __init__( self , run_folder , opts = '' ):
3939
for r, l in sorted(self.runinfo_xml.read_and_length.items()):
4040
self.trigger_cycles.append(self.trigger_cycles[-1] + int(l))
4141

42+
# Correct the last one
43+
self.trigger_cycles[-1] -= 1
44+
4245
# We can do the well dups check after 70 cycles but we can only
4346
# check the indexes after the last index read is complete, so wait for that
4447
try:
@@ -68,7 +71,13 @@ def _is_sequencing_finished( self ):
6871
# however there were no runs where the RTAComplete.txt was not the last file written to the run folder.
6972
# So will only check for this file to determine if sequencing has finished or not
7073
RTACOMPLETE_LOCATION = os.path.join( self.run_path_folder , 'RTAComplete.txt' )
71-
return os.path.exists( RTACOMPLETE_LOCATION )
74+
75+
# Oh but it's no longer that simple. NovaSeq has started writing this file before all the CBCL
76+
# files are in place.
77+
if not os.path.exists( RTACOMPLETE_LOCATION ):
78+
return False
79+
80+
return self._is_read_finished(-1)
7281

7382
def _exists( self, glob_pattern ):
7483
""" Returns if a file exists and caches the result.
@@ -91,7 +100,7 @@ def _is_read_finished( self, readnum ):
91100
# readnum counts from 1 so trigger_cycles[readnum] is the first cycle of the
92101
# next read (trigger_cycles[0] is always 1)
93102
cycle = self.trigger_cycles[int(readnum)]
94-
return self._exists( "Data/Intensities/BaseCalls/L001/C{}.1/*".format(cycle) )
103+
return self._exists( f"Data/Intensities/BaseCalls/L001/C{cycle}.1/*" )
95104
except Exception:
96105
return False
97106

@@ -174,7 +183,7 @@ def get_machine_status( self ):
174183
return "complete"
175184
for n in range(len(self.trigger_cycles), 0 , -1):
176185
if self._is_read_finished(n):
177-
return "read{}_complete".format(n)
186+
return f"read{n}_complete"
178187
return "waiting_for_data"
179188

180189

doc/CopyComplete.txt

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
Soooo...
2+
3+
Up until now, one of the easiest things in the Illuminatus pipeline was deciding if
4+
a run was ready to demultiplex. If RTAComplete.txt is there, then we go.
5+
6+
But recently a couple of runs have failed, because a CBCL file was missing.
7+
8+
So it seems that wating for RTAComplete.txt is not enough. In NovaSeq there is another
9+
file, CopyComplete.txt. But this comes in an hour after RTAComplete.txt, and it seems
10+
that we were only missing the CBCL file by a few minutes.
11+
12+
11:10 240722_A00291_0576_AHVHL5DMXY/RTAComplete.txt
13+
11:10 240722_A00291_0576_AHVHL5DMXY/Data/Intensities/BaseCalls/L002/C222.1/L002_2.cbcl
14+
12:43 240722_A00291_0576_AHVHL5DMXY/CopyComplete.txt
15+
16+
So, our choices are:
17+
18+
1) Wait for CopyComplete.txt and add 90 minutes to processing time.
19+
2) Add a short delay (5 mins?) to after RTAComplete.txt appears (yuk!)
20+
3) Look explicitly for the CBCL file
21+
4) Something else??
22+
23+
I can't see a decent option 4. I think we need to do option 3. Fortunately we already have
24+
code for Read1 ready detection.

test/run_tests.sh

+2-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ if [ -n "$VIRTUAL_ENV" ] ; then
1212
elif [ -e _illuminatus_venv ] ; then
1313
echo "Running: source ./_illuminatus_venv/bin/activate"
1414
source ./_illuminatus_venv/bin/activate
15-
if [ "$(which python3)" != "$(readlink -f _illuminatus_venv)/bin/python3" ] ; then
15+
if [ "$(readlink -f "$(dirname "$(which python3)")")/python3" != \
16+
"$(readlink -f _illuminatus_venv)/bin/python3" ] ; then
1617
echo "FAILED - python3 is $(which python3) not $(readlink -f _illuminatus_venv)/bin/python3"
1718
exit 1
1819
fi

test/seqdata_examples/160603_M01270_0196_000000000-AKGDE/Data/Intensities/BaseCalls/L001/C612.1/foo.bcl

Whitespace-only changes.

test/seqdata_examples/201125_A00291_0321_AHWHKYDRXX/Data/Intensities/BaseCalls/L001/C218.1/foo.cbcl

Whitespace-only changes.

test/test_run_status.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ def test_redo_on_early_fail( self ):
113113
read1 processing at all.
114114
"""
115115
run_info = self.use_run('160726_K00166_0120_BHCVH2BBXX', copy=True)
116+
self.md('Data/Intensities/BaseCalls/L001/C318.1')
117+
self.touch('Data/Intensities/BaseCalls/L001/C318.1/foo.bcl')
116118

117119
# So the run is failed but still waiting for data
118120
self.touch('pipeline/failed')
@@ -176,6 +178,8 @@ def test_messy_redo( self ):
176178
self.md('pipeline/output/seqdata/pipeline')
177179
self.touch('pipeline/read1.done')
178180
self.touch('RTAComplete.txt')
181+
self.md('Data/Intensities/BaseCalls/L001/C318.1')
182+
self.touch('Data/Intensities/BaseCalls/L001/C318.1/foo.bcl')
179183

180184
def no_redo(status):
181185
""" Check the status AND also check
@@ -317,6 +321,9 @@ def yes_redo(status):
317321

318322
# Now we finish the reads
319323
self.touch('RTAComplete.txt')
324+
self.md('Data/Intensities/BaseCalls/L001/C318.1')
325+
self.touch('Data/Intensities/BaseCalls/L001/C318.1/foo.bcl')
326+
320327
no_redo('reads_finished')
321328
self.assertEqual(gy()['MachineStatus:'], 'complete')
322329

@@ -408,8 +415,10 @@ def yes_redo(status):
408415

409416
no_redo('read1_finished')
410417

411-
# Adding an RTAComplete.txt file should not change this status
418+
# Adding an RTAComplete.txt file and final BCL should not change this status
412419
self.touch('RTAComplete.txt')
420+
self.md('Data/Intensities/BaseCalls/L001/C68.1')
421+
self.touch('Data/Intensities/BaseCalls/L001/C68.1/foo.bcl')
413422
no_redo('read1_finished')
414423

415424
# Adding read1.started should push us to the state where ops will trigger in parallel

0 commit comments

Comments
 (0)