Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Option to handle missing values #39

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 36 additions & 11 deletions src/csv_module.F90
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ module csv_module

real(wp),parameter :: zero = 0.0_wp

character(len=0), parameter :: defmissing = ''

type,public :: csv_string
!! a cell from a CSV file.
!!
Expand Down Expand Up @@ -50,6 +52,7 @@ module csv_module
integer :: n_rows = 0 !! number of rows in the file
integer :: n_cols = 0 !! number of columns in the file
integer :: chunk_size = 1024 !! for expanding vectors
type(csv_string) :: missing !! missing value
type(csv_string),dimension(:),allocatable :: header !! the header
type(csv_string),dimension(:,:),allocatable :: csv_data !! the data in the file

Expand Down Expand Up @@ -133,6 +136,7 @@ subroutine initialize_csv_file(me,quote,delimiter,&
logical_true_string,&
logical_false_string,&
chunk_size,&
missing,&
verbose)

implicit none
Expand All @@ -158,6 +162,7 @@ subroutine initialize_csv_file(me,quote,delimiter,&
!! (default is `F`)
integer,intent(in),optional :: chunk_size !! factor for expanding vectors
!! (default is 100)
character(len=*),intent(in),optional :: missing !! string containing a missing code
logical,intent(in),optional :: verbose !! print error messages to the
!! console (default is False)

Expand All @@ -174,6 +179,9 @@ subroutine initialize_csv_file(me,quote,delimiter,&
if (present(verbose)) me%verbose = verbose
if (present(chunk_size)) me%chunk_size = chunk_size

me%missing%str = defmissing
if (present(missing)) me%missing%str = missing

! override:
if (me%enclose_all_in_quotes) me%enclose_strings_in_quotes = .true.

Expand All @@ -197,7 +205,7 @@ end subroutine destroy_csv_file
!>
! Read a CSV file.

subroutine read_csv_file(me,filename,header_row,skip_rows,status_ok)
subroutine read_csv_file(me,filename,header_row,skip_rows,missing,status_ok)

implicit none

Expand All @@ -206,6 +214,7 @@ subroutine read_csv_file(me,filename,header_row,skip_rows,status_ok)
logical,intent(out) :: status_ok !! status flag
integer,intent(in),optional :: header_row !! the header row
integer,dimension(:),intent(in),optional :: skip_rows !! rows to skip
character(len=*),intent(in),optional :: missing

type(csv_string),dimension(:),allocatable :: row_data !! a tokenized row
integer,dimension(:),allocatable :: rows_to_skip !! the actual rows to skip
Expand All @@ -227,6 +236,9 @@ subroutine read_csv_file(me,filename,header_row,skip_rows,status_ok)
call me%destroy()
arrays_allocated = .false.

me%missing%str = defmissing
if (present(missing)) me%missing%str = missing

open(newunit=iunit, file=filename, status='OLD', iostat=istat)

if (istat==0) then
Expand Down Expand Up @@ -1113,7 +1125,7 @@ subroutine tokenize_csv_line(me,line,cells)
character(len=:),allocatable :: tmp !! a temp string with whitespace removed
integer :: n !! length of compressed string

call split(line,me%delimiter,me%chunk_size,cells)
call split(line,me%delimiter,me%chunk_size,me%missing%str,cells)

! remove quotes if present:
do i = 1, size(cells)
Expand Down Expand Up @@ -1219,19 +1231,20 @@ end subroutine read_line_from_file
!````Fortran
! character(len=:),allocatable :: s
! type(csv_string),dimension(:),allocatable :: vals
! s = '1,2,3,4,5'
! call split(s,',',vals)
! s = '1,2,3,,5'
! call split(s,',','-999',vals)
!````
!
!@warning Does not account for tokens contained within quotes string !!!

pure subroutine split(str,token,chunk_size,vals)
pure subroutine split(str,token,chunk_size,missing,vals)

implicit none

character(len=*),intent(in) :: str
character(len=*),intent(in) :: token
integer,intent(in) :: chunk_size !! for expanding vectors
character(len=*),intent(in) :: missing
type(csv_string),dimension(:),allocatable,intent(out) :: vals

integer :: i !! counter
Expand Down Expand Up @@ -1285,9 +1298,13 @@ pure subroutine split(str,token,chunk_size,vals)
i1 = 1
i2 = itokens(1)-1
if (i2>=i1) then
vals(1)%str = str(i1:i2)
if(len_trim(str(i1:i2)) == 0)then
vals(1)%str = missing !the first character is a token
else
vals(1)%str = str(i1:i2)
endif
else
vals(1)%str = '' !the first character is a token
vals(1)%str = missing !the first character is a token
end if

! 1 2 3
Expand All @@ -1297,18 +1314,26 @@ pure subroutine split(str,token,chunk_size,vals)
i1 = itokens(i-1)+len_token
i2 = itokens(i)-1
if (i2>=i1) then
vals(i)%str = str(i1:i2)
if(len_trim(str(i1:i2)) == 0)then
vals(i)%str =missing
else
vals(i)%str = str(i1:i2)
endif
else
vals(i)%str = '' !empty element (e.g., 'abc,,def')
vals(i)%str = missing !empty element (e.g., 'abc,,def')
end if
end do

i1 = itokens(n_tokens) + len_token
i2 = len_str
if (itokens(n_tokens)+len_token<=len_str) then
vals(n_tokens+1)%str = str(i1:i2)
if(len_trim(str(i1:i2)) == 0)then
vals(n_tokens+1)%str = missing
else
vals(n_tokens+1)%str = str(i1:i2)
endif
else
vals(n_tokens+1)%str = '' !the last character was a token
vals(n_tokens+1)%str = missing !the last character was a token
end if

else
Expand Down