Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,8 @@
^revdep$
^cran-comments.md$
^pkgdown$
^.github$

node_modules$
package-lock\.json$
package\.json$
50 changes: 50 additions & 0 deletions .github/workflows/rworkflows.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
name: rworkflows
'on':
push:
branches:
- master
- main
- RELEASE_**
pull_request:
branches:
- master
- main
- RELEASE_**
jobs:
rworkflows:
runs-on: ${{ matrix.config.os }}
name: ${{ matrix.config.os }} (${{ matrix.config.r }})
container: ${{ matrix.config.cont }}
strategy:
fail-fast: ${{ false }}
matrix:
config:
- os: ubuntu-latest
r: devel
bioc: devel
cont: bioconductor/bioconductor_docker:devel
rspm: https://packagemanager.rstudio.com/cran/__linux__/focal/release
- os: macOS-latest
r: latest
bioc: release
- os: windows-latest
r: latest
bioc: release
steps:
- uses: neurogenomics/rworkflows@master
with:
run_bioccheck: ${{ false }}
run_rcmdcheck: ${{ true }}
as_cran: ${{ true }}
run_vignettes: ${{ true }}
has_testthat: ${{ true }}
run_covr: ${{ true }}
run_pkgdown: ${{ true }}
has_runit: ${{ false }}
GITHUB_TOKEN: ${{ secrets.PAT_GITHUB }}
run_docker: ${{ true }}
docker_user: bschilder
docker_org: neurogenomicslab
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
runner_os: ${{ runner.os }}
cache_version: cache-master
18 changes: 0 additions & 18 deletions .travis.yml

This file was deleted.

10 changes: 6 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
Package: cranlogs
Title: Download Logs from the 'RStudio' 'CRAN' Mirror
Version: 2.1.1.9000
Version: 2.1.2
Authors@R: c(
person("Gábor", "Csárdi",, "[email protected]", role = c("aut", "cre")),
person(given="Gábor", family="Csárdi", email="[email protected]", role = c("aut", "cre")),
person(given="Brian", family="Schilder", email="[email protected]", role = c("ctb"), comment = c(ORCID = "0000-0001-5949-2191")),
person("R Consortium", role = c("fnd")))
Description: 'API' to the database of 'CRAN' package downloads from the
'RStudio' 'CRAN mirror'. The database itself is at <http://cranlogs.r-pkg.org>,
Expand All @@ -12,8 +13,9 @@ URL: https://github.com/r-hub/cranlogs, https://r-hub.github.io/cranlogs
BugReports: https://github.com/r-hub/cranlogs/issues
Imports:
httr,
jsonlite
jsonlite,
parallel
Encoding: UTF-8
RoxygenNote: 6.1.1
RoxygenNote: 7.2.3
Suggests:
testthat
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ importFrom(httr,GET)
importFrom(httr,content)
importFrom(httr,stop_for_status)
importFrom(jsonlite,fromJSON)
importFrom(parallel,mclapply)
13 changes: 12 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@

# dev
# cranlogs 2.1.2

* Fix `cran_downloads` and prevent it from failing with many packages:
https://github.com/r-hub/cranlogs/issues/56
* Speed up `cran_downloads` with parallelisation.
* Add new unit test for `cran_downloads` with lots of packages.
* Remove `dontrun{}` from `cran_downloads` and `cran_top_downloads` examples.
Unclear why this was here?
* New support function: `message_parallel`, `split_batches`
* Replace travis (failing) and pkgdown with `rworkflows`.
* Update Authors in *DESCRIPTION* to reflect contribution, make fields explicit.
* Add *node_modules$* to *.Rbuildignore*.

# cranlogs 2.1.1

Expand Down
72 changes: 52 additions & 20 deletions R/cranlogs.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ top_url <- paste0(base_url, "top/")
#' \code{last-day}. It is ignored if \code{when} is given.
#' @param to End date, in \code{yyyy-mm-dd} format, or
#' \code{last-day}. It is ignored if \code{when} is given.
#' @param batch_size Maximum batch size per query.
#' A batch size <=800 is reccomended, as going above that can cause errors.
#' @param mc.cores Number of cores to parallelise batched queries across.
#' @return For packages a data frame with columns:
#' \item{\code{package}}{The package. This column is missing if
#' all packages were queried.}
Expand All @@ -36,8 +39,8 @@ top_url <- paste0(base_url, "top/")
#' for that day.
#' @family CRAN downloads
#' @export
#' @examples
#' \dontrun{
#' @importFrom parallel mclapply
#' @examples
#' ## Default is last day for which data is available.
#' cran_downloads()
#'
Expand All @@ -55,16 +58,18 @@ top_url <- paste0(base_url, "top/")
#' cran_downloads(packages = c("ggplot2", "plyr", "dplyr"))
#'
#' ## R downloads
#' cran_downloads("R")
#' }

#' cran_downloads("R")
cran_downloads <- function(packages = NULL,
when = c("last-day", "last-week", "last-month"),
from = "last-day", to = "last-day") {
from = "last-day",
to = "last-day",
batch_size = 800,
mc.cores = 1) {

if (!missing(when)) {
interval <- match.arg(when)
} else {
when <- when[[1]] ### Take only first entry
if (as.character(from) != "last-day") {
check_date(from)
}
Expand All @@ -78,28 +83,57 @@ cran_downloads <- function(packages = NULL,
} else {
interval <- paste(from, sep = ":", to)
}
}
}
#### Check packages ####
if (!is.null(packages) &&
"R" %in% packages &&
any(packages != "R")) {
stop("R downloads cannot be mixed with package downloads")
}
#### Split into batches ####
t1 <- Sys.time()
batches <- split_batches(v = packages,
batch_size = batch_size)
df <- parallel::mclapply(seq_len(length(batches)), function(i){
b <- batches[[i]]
message_parallel(paste("Batch:",i,"/",length(batches)))
query(interval=interval,
packages=b)
}, mc.cores = mc.cores)
#### Bind all batch results ####
df <- do.call(rbind,df)
### Report total time taken ####
round(difftime(Sys.time(),t1),1)
return(df)
}

query <- function(interval,
packages){
#### Prepare URI ####
if (is.null(packages)) {
ppackages <- ""
} else {
if ("R" %in% packages && any(packages != "R")) {
stop("R downloads cannot be mixed with package downloads")
}
ppackages <- paste(packages, collapse = ",")
ppackages <- paste0("/", ppackages)
}

}
#### Request data ####
req <- GET(paste0(daily_url, interval, ppackages),
httr::user_agent("cranlogs R package by R-hub"))
stop_for_status(req)
#### Handle errors due to too many packages ###
status <- tryCatch({
stop_for_status(req)
}, error=function(e){e})
if(grepl("Request-URI Too Long (HTTP 414)",as.character(status))){
stop("Request-URI Too Long (HTTP 414): Try reducing `batch_size`.")
}
#### Access content ####
r <- fromJSON(content(req, as = "text"), simplifyVector = FALSE)

#### Check content ####
if ("error" %in% names(r) && r$error == "Invalid query") {
stop("Invalid query, probably invalid dates")
}
#### Convert to data.frame ####
to_df(r, packages)

}

to_df <- function(res, packages) {
Expand Down Expand Up @@ -150,7 +184,7 @@ fill_in_dates <- function(df, start, end) {
rownames(df) <- NULL
}
df
}
}

#' Top downloaded packages from the RStudio CRAN mirror
#'
Expand All @@ -164,15 +198,13 @@ fill_in_dates <- function(df, start, end) {
#'
#' @family CRAN downloads
#' @export
#' @examples
#' \dontrun{
#' @examples
#' ## Default is last day for which data is available.
#' cran_top_downloads()
#'
#' ## Last week (6 days prior to the last day for which data is available)
#' ## instead
#' cran_top_downloads(when = "last-week")
#' }
#' cran_top_downloads(when = "last-week")
#'
#' @details \code{last-day} is the last day for which data is available,
#' \code{last-week} is from 6 days prior to that last day with data,
Expand Down
8 changes: 8 additions & 0 deletions R/message_parallel.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#' Message parallel
#'
#' Send messages to console even from within parallel processes
#' @return Null
#' @keywords internal
message_parallel <- function(...) {
system(sprintf('echo "%s"', paste0(..., collapse = "")))
}
5 changes: 5 additions & 0 deletions R/split_batches.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
split_batches <- function(v,
batch_size){
if(is.null(v)) return(list("1"=NULL))
batches <- split(v, ceiling(seq_along(v)/batch_size))
}
2 changes: 1 addition & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ knitr::opts_chunk$set(
> Download logs from the RStudio CRAN mirror

<!-- badges: start -->
[![Linux Build Status](https://travis-ci.org/r-hub/cranlogs.svg?branch=master)](https://travis-ci.org/r-hub/cranlogs)
[![R build status](https://github.com/r-hub/cranlogs/workflows/rworkflows/badge.svg)](https://github.com/r-hub/cranlogs/actions)
[![Windows Build status](https://ci.appveyor.com/api/projects/status/github/metacran/cranlogs?svg=true)](https://ci.appveyor.com/project/gaborcsardi/cranlogs)
[![CRAN version](http://www.r-pkg.org/badges/version/cranlogs)](http://www.r-pkg.org/pkg/cranlogs)
[![CRAN RStudio mirror downloads](http://cranlogs.r-pkg.org/badges/cranlogs)](http://www.r-pkg.org/pkg/cranlogs)
Expand Down
Loading