@@ -28,7 +28,7 @@ use std::{
28
28
use arrow_array:: RecordBatch ;
29
29
use arrow_ipc:: writer:: StreamWriter ;
30
30
use arrow_schema:: Schema ;
31
- use chrono:: { NaiveDateTime , Timelike , Utc } ;
31
+ use chrono:: { DateTime , Datelike , NaiveDateTime , Timelike , Utc } ;
32
32
use derive_more:: { Deref , DerefMut } ;
33
33
use itertools:: Itertools ;
34
34
use parquet:: {
@@ -136,8 +136,7 @@ impl<'a> Stream<'a> {
136
136
hostname. push_str ( & INGESTOR_META . get_ingestor_id ( ) ) ;
137
137
}
138
138
let filename = format ! (
139
- "{}{stream_hash}.date={}.hour={:02}.minute={}.{}{hostname}.{ARROW_FILE_EXTENSION}" ,
140
- Utc :: now( ) . format( "%Y%m%dT%H%M" ) ,
139
+ "{stream_hash}.date={}.hour={:02}.minute={}.{}{hostname}.{ARROW_FILE_EXTENSION}" ,
141
140
parsed_timestamp. date( ) ,
142
141
parsed_timestamp. hour( ) ,
143
142
minute_to_slot( parsed_timestamp. minute( ) , OBJECT_STORE_DATA_GRANULARITY ) . unwrap( ) ,
@@ -155,13 +154,13 @@ impl<'a> Stream<'a> {
155
154
return vec ! [ ] ;
156
155
} ;
157
156
158
- let paths = dir
157
+ let paths: Vec < PathBuf > = dir
159
158
. flatten ( )
160
159
. map ( |file| file. path ( ) )
161
160
. filter ( |file| file. extension ( ) . is_some_and ( |ext| ext. eq ( "arrows" ) ) )
162
- . sorted_by_key ( |f| f. metadata ( ) . unwrap ( ) . modified ( ) . unwrap ( ) )
161
+ . sorted_by_key ( |f| f. metadata ( ) . unwrap ( ) . created ( ) . unwrap ( ) )
163
162
. collect ( ) ;
164
-
163
+
165
164
paths
166
165
}
167
166
@@ -172,24 +171,36 @@ impl<'a> Stream<'a> {
172
171
/// Only includes ones starting from the previous minute
173
172
pub fn arrow_files_grouped_exclude_time (
174
173
& self ,
175
- exclude : NaiveDateTime ,
176
174
shutdown_signal : bool ,
177
175
) -> HashMap < PathBuf , Vec < PathBuf > > {
176
+ let now = Utc :: now ( ) ;
177
+
178
+ // Extract date and time components of current time
179
+ let now_date = ( now. year ( ) , now. month ( ) , now. day ( ) ) ;
180
+ let now_time = ( now. hour ( ) , now. minute ( ) ) ;
181
+
178
182
let mut grouped_arrow_file: HashMap < PathBuf , Vec < PathBuf > > = HashMap :: new ( ) ;
179
183
let mut arrow_files = self . arrow_files ( ) ;
180
-
181
- // if the shutdown signal is false i.e. normal condition
182
- // don't keep the ones for the current minute
183
- if !shutdown_signal {
184
- arrow_files. retain ( |path| {
185
- !path
186
- . file_name ( )
187
- . unwrap ( )
188
- . to_str ( )
189
- . unwrap ( )
190
- . starts_with ( & exclude. format ( "%Y%m%dT%H%M" ) . to_string ( ) )
191
- } ) ;
192
- }
184
+ arrow_files = arrow_files
185
+ . into_iter ( )
186
+ . filter ( |path| {
187
+ let created_at = path. metadata ( ) . unwrap ( ) . created ( ) . unwrap ( ) ;
188
+ let created_at: DateTime < Utc > = created_at. into ( ) ;
189
+ let created_date = ( created_at. year ( ) , created_at. month ( ) , created_at. day ( ) ) ;
190
+ let created_time = ( created_at. hour ( ) , created_at. minute ( ) ) ;
191
+
192
+ let same_date = now_date == created_date;
193
+ let same_time = now_time == created_time;
194
+
195
+ // if the shutdown signal is false i.e. normal condition
196
+ // don't keep the ones for the current minute
197
+ if !shutdown_signal {
198
+ !same_date || !same_time
199
+ } else {
200
+ true
201
+ }
202
+ } )
203
+ . collect ( ) ;
193
204
194
205
let random_string =
195
206
rand:: distributions:: Alphanumeric . sample_string ( & mut rand:: thread_rng ( ) , 15 ) ;
@@ -306,8 +317,7 @@ impl<'a> Stream<'a> {
306
317
) -> Result < Option < Schema > , StagingError > {
307
318
let mut schemas = Vec :: new ( ) ;
308
319
309
- let time = chrono:: Utc :: now ( ) . naive_utc ( ) ;
310
- let staging_files = self . arrow_files_grouped_exclude_time ( time, shutdown_signal) ;
320
+ let staging_files = self . arrow_files_grouped_exclude_time ( shutdown_signal) ;
311
321
if staging_files. is_empty ( ) {
312
322
metrics:: STAGING_FILES
313
323
. with_label_values ( & [ & self . stream_name ] )
0 commit comments