Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ export(frollsum)
export(frollmax)
export(frollmin)
export(frollprod)
export(frollmedian)
export(frollvar)
export(frollsd)
export(frollapply)
export(frolladapt)
export(nafill)
Expand Down
49 changes: 48 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,52 @@
#9: 2025-09-22 9 8 9.0
```

19. New rolling functions, `frollmin` and `frollprod`, have been implemented, towards [#2778](https://github.com/Rdatatable/data.table/issues/2778). Thanks to @jangorecki for implementation.
19. Other new rolling functions: `frollmin`, `frollprod`, `frollmedian`, `frollvar` and `frollsd`, have been implemented, resolving long standing issue [#2778](https://github.com/Rdatatable/data.table/issues/2778). Thanks to @jangorecki for implementation. Implementation of rolling median is based on a novel algorithm "sort-median" described by [@suomela](https://github.com/suomela) in his 2014 paper [Median Filtering is Equivalent to Sorting](https://arxiv.org/abs/1406.1717). "sort-median" scales very well, not only for size of input vector but also for size of rolling window.
```r
rollmedian = function(x, n) {
ans = rep(NA_real_, nx<-length(x))
if (n<=nx) for (i in n:nx) ans[i] = median(x[(i-n+1L):(i)])
ans
}
library(data.table)
setDTthreads(8)
set.seed(108)
x = rnorm(1e5)

n = 100
system.time(rollmedian(x, n))
# user system elapsed
# 2.049 0.001 2.051
system.time(frollapply(x, n, median, simplify=unlist))
# user system elapsed
# 3.071 0.223 0.436
system.time(frollmedian(x, n))
# user system elapsed
# 0.013 0.000 0.004

n = 1000
system.time(rollmedian(x, n))
# user system elapsed
# 3.496 0.009 3.507
system.time(frollapply(x, n, median, simplify=unlist))
# user system elapsed
# 4.552 0.307 0.632
system.time(frollmedian(x, n))
# user system elapsed
# 0.015 0.000 0.004

n = 10000
system.time(rollmedian(x, n))
# user system elapsed
# 16.350 0.025 16.382
system.time(frollapply(x, n, median, simplify=unlist))
# user system elapsed
# 14.865 0.722 2.267
system.time(frollmedian(x, n))
# user system elapsed
# 0.028 0.000 0.005
```
20. `fread()` now supports the `comment.char` argument to skip trailing comments or comment-only lines, consistent with `read.table()`, [#856](https://github.com/Rdatatable/data.table/issues/856). The default remains `comment.char = ""` (no comment parsing) for backward compatibility and performance, in contrast to `read.table(comment.char = "#")`. Thanks to @arunsrinivasan and many others for the suggestion and @ben-schwen for the implementation.

### BUG FIXES

Expand Down Expand Up @@ -288,6 +333,8 @@

19. Ellipsis elements like `..1` are correctly excluded when searching for variables in "up-a-level" syntax inside `[`, [#5460](https://github.com/Rdatatable/data.table/issues/5460). Thanks @ggrothendieck for the report and @MichaelChirico for the fix.

20. BREAKING CHANGE: `week()` now calculates the week of the year sequentially (days 1-7 are week 1), fixing a bug where the first week could have 6 days. A one-time warning is now issued if this change affects the output for a given input, which can be disabled via `options(datatable.warn.week.change = FALSE)`. [#2611](https://github.com/Rdatatable/data.table/issues/2611). Thanks to @MichaelChirico for the report and @venom1204 for the fix.

### NOTES

1. The following in-progress deprecations have proceeded:
Expand Down
7 changes: 5 additions & 2 deletions R/fread.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ fread = function(
input="", file=NULL, text=NULL, cmd=NULL, sep="auto", sep2="auto", dec="auto", quote="\"", nrows=Inf, header="auto",
na.strings=getOption("datatable.na.strings","NA"), stringsAsFactors=FALSE, verbose=getOption("datatable.verbose",FALSE),
skip="__auto__", select=NULL, drop=NULL, colClasses=NULL, integer64=getOption("datatable.integer64","integer64"),
col.names, check.names=FALSE, encoding="unknown", strip.white=TRUE, fill=FALSE, blank.lines.skip=FALSE, key=NULL, index=NULL,
col.names, check.names=FALSE, encoding="unknown", strip.white=TRUE, fill=FALSE, blank.lines.skip=FALSE, comment.char="", key=NULL, index=NULL,
showProgress=getOption("datatable.showProgress",interactive()), data.table=getOption("datatable.fread.datatable",TRUE),
nThread=getDTthreads(verbose), logical01=getOption("datatable.logical01",FALSE),
logicalYN=getOption("datatable.logicalYN", FALSE),
Expand Down Expand Up @@ -30,6 +30,9 @@ yaml=FALSE, tmpdir=tempdir(), tz="UTC")
isTRUEorFALSE(stringsAsFactors) || (is.double(stringsAsFactors) && length(stringsAsFactors)==1L && 0.0<=stringsAsFactors && stringsAsFactors<=1.0),
is.numeric(nrows), length(nrows)==1L
)
if (!is.character(comment.char) || length(comment.char) != 1L || is.na(comment.char) || nchar(comment.char) > 1L) {
stopf("comment.char= must be a single non-NA character.")
}
fill = if(identical(fill, Inf)) .Machine$integer.max else as.integer(fill)
nrows=as.double(nrows) #4686
if (is.na(nrows) || nrows<0L) nrows=Inf # accept -1 to mean Inf, as read.table does
Expand Down Expand Up @@ -289,7 +292,7 @@ yaml=FALSE, tmpdir=tempdir(), tz="UTC")
if (identical(tt,"") || is_utc(tt)) # empty TZ env variable ("") means UTC in C library, unlike R; _unset_ TZ means local
tz="UTC"
}
ans = .Call(CfreadR,input,identical(input,file),sep,dec,quote,header,nrows,skip,na.strings,strip.white,blank.lines.skip,
ans = .Call(CfreadR,input,identical(input,file),sep,dec,quote,header,nrows,skip,na.strings,strip.white,blank.lines.skip,comment.char,
fill,showProgress,nThread,verbose,warnings2errors,logical01,logicalYN,select,drop,colClasses,integer64,encoding,keepLeadingZeros,tz=="UTC")
if (!length(ans)) return(null.data.table()) # test 1743.308 drops all columns
nr = length(ans[[1L]])
Expand Down
9 changes: 9 additions & 0 deletions R/froll.R
Original file line number Diff line number Diff line change
Expand Up @@ -213,3 +213,12 @@ frollmin = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left
frollprod = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
froll(fun="prod", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
}
frollmedian = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
froll(fun="median", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
}
frollvar = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
froll(fun="var", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
}
frollsd = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
froll(fun="sd", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
}
Loading
Loading