Skip to content

Commit

Permalink
Do not repackage source archive
Browse files Browse the repository at this point in the history
For external packages, we currently extract the source archive, only to
repackage it again using `R CMD build`. This results in slightly slower
builds, and could lead to reproducibility problems when the unecessary
step is not reproducibile.
  • Loading branch information
Siddhartha Bagaria committed Mar 8, 2022
1 parent 3723cbb commit dfee2f7
Show file tree
Hide file tree
Showing 6 changed files with 266 additions and 60 deletions.
4 changes: 3 additions & 1 deletion R/defs.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,14 @@ sandbox and generate a script to run the executable.
r_test is similar to r_binary, but acts as a test.
"""

load("@com_grail_rules_r//R/internal:build.bzl", _r_binary_pkg = "r_binary_pkg", _r_pkg = "r_pkg")
load("@com_grail_rules_r//R/internal:build.bzl", _r_binary_pkg = "r_binary_pkg", _r_pkg = "r_pkg", _r_source_pkg = "r_source_pkg")
load("@com_grail_rules_r//R/internal:library.bzl", _r_library = "r_library", _r_library_tar = "r_library_tar")
load("@com_grail_rules_r//R/internal:tests.bzl", _r_pkg_test = "r_pkg_test", _r_unit_test = "r_unit_test")
load("@com_grail_rules_r//R/internal:binary.bzl", _r_binary = "r_binary", _r_markdown = "r_markdown", _r_test = "r_test")
load("@com_grail_rules_r//R/internal/toolchains:toolchain.bzl", _r_toolchain = "r_toolchain")

r_source_pkg = _r_source_pkg

r_binary_pkg = _r_binary_pkg

r_pkg = _r_pkg
Expand Down
228 changes: 190 additions & 38 deletions R/internal/build.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -534,8 +534,8 @@ def _build_impl(ctx):
external_repo = external_repo,
makevars = ctx.file.makevars,
pkg_deps = pkg_deps,
pkg_lib_dir = pkg_lib_dir,
pkg_gcno_files = pkg_gcno_files,
pkg_lib_dir = pkg_lib_dir,
pkg_name = pkg_name,
src_archive = pkg_src_archive,
src_files = ctx.files.srcs,
Expand All @@ -546,6 +546,129 @@ def _build_impl(ctx):
instrumented_files_info,
]

def _build_source_pkg_impl(ctx):
info = ctx.toolchains["@com_grail_rules_r//R:toolchain_type"].RInfo

pkg_name = _package_name(ctx)
pkg_src_dir = _package_dir(ctx)
pkg_lib_dir = ctx.actions.declare_directory("lib")
pkg_bin_archive = ctx.outputs.bin_archive
flock = ctx.attr._flock.files_to_run.executable

# Instrumenting external R packages can be troublesome; e.g. RProtoBuf and testthat.
external_repo = _external_repo(ctx)
instrumented = (ctx.coverage_instrumented() and
not external_repo and
not "no-instrument" in ctx.attr.tags)

pkg_deps = _flatten_pkg_deps_list(ctx.attr.deps)

library_deps = _library_deps(pkg_deps)
cc_deps = _cc_deps(ctx, instrumented)
transitive_tools = depset(
_executables(ctx.attr.tools),
transitive = [library_deps.transitive_tools],
)
build_tools = depset(
_executables(ctx.attr.build_tools + info.tools),
transitive = [transitive_tools],
)
instrument_files = [ctx.file._instrument_R] if instrumented else []

input_files = ([ctx.file.src, ctx.file._build_pkg_common_sh, ctx.file._build_pkg_bin_sh] +
library_deps.lib_dirs + cc_deps.files +
_makevars_files(info.makevars_site, ctx.file.makevars) +
build_tools.to_list() +
instrument_files +
info.files + [info.state])

if ctx.file.config_override:
input_files.append(ctx.file.config_override)

output_files = [pkg_lib_dir, pkg_bin_archive]
install_args = list(ctx.attr.install_args)
if instrumented:
# We need to keep the sources for code coverage to work.
# NOTE: With these options, each installed object in package namespaces gets a
# srcref attribute that has the source filenames as when installing the package.
# For reproducible builds, these will be /tmp paths.
install_args.extend(["--with-keep.source"])

# We currently do not instrument native code in source archives.

env = {
"DIRECT_FROM_SOURCE": "true",
"PKG_SRC_DIR": pkg_src_dir,
"PKG_LIB_PATH": pkg_lib_dir.path,
"PKG_SRC_ARCHIVE": ctx.file.src.path,
"PKG_BIN_ARCHIVE": pkg_bin_archive.path,
"PKG_NAME": pkg_name,
"R_MAKEVARS_SITE": info.makevars_site.path if info.makevars_site else "",
"R_MAKEVARS_USER": ctx.file.makevars.path if ctx.file.makevars else "",
"C_LIBS_FLAGS": " ".join(cc_deps.c_libs_flags),
"C_CPP_FLAGS": " ".join(cc_deps.c_cpp_flags),
"C_SO_FILES": _sh_quote_args([f.path for f in cc_deps.c_so_files]),
"CONFIG_OVERRIDE": ctx.file.config_override.path if ctx.file.config_override else "",
"R_LIBS_DEPS": ":".join(["_EXEC_ROOT_" + d.path for d in library_deps.lib_dirs]),
"EXPORT_ENV_VARS_CMD": "; ".join(_env_vars(info.env_vars) + _env_vars(ctx.attr.env_vars)),
"BUILD_TOOLS_EXPORT_CMD": _build_path_export(build_tools),
"INSTALL_ARGS": _sh_quote_args(install_args),
"INSTRUMENT_SCRIPT": ctx.file._instrument_R.path,
"FLOCK_PATH": flock.path,
"INSTRUMENTED": "true" if instrumented else "false",
"BAZEL_R_DEBUG": "true" if "rlang-debug" in ctx.features else "false",
"BAZEL_R_VERBOSE": "true" if "rlang-verbose" in ctx.features else "false",
"R": " ".join(info.r),
"RSCRIPT": " ".join(info.rscript),
"REQUIRED_VERSION": info.version,
}
ctx.actions.run(
outputs = output_files,
inputs = input_files,
tools = [flock],
executable = ctx.executable._build_pkg_bin_sh,
env = env,
mnemonic = "RBuild",
use_default_shell_env = False,
progress_message = "Building R package %s" % pkg_name,
)

ctx.actions.symlink(output = ctx.outputs.src_archive, target_file = ctx.file.src)

_symlink_so_lib(ctx, pkg_name, pkg_lib_dir)

instrumented_files_info = coverage_common.instrumented_files_info(
ctx,
# List the dependencies in which transitive instrumented files can be found.
dependency_attributes = ["deps", "cc_deps"],
# We build instrumented packages with --keep.source, so we don't
# need the .R files.
)

return [
DefaultInfo(
files = depset([pkg_lib_dir]),
runfiles = ctx.runfiles([pkg_lib_dir], collect_default = True),
),
RPackage(
bin_archive = pkg_bin_archive,
build_tools = build_tools,
cc_deps = cc_deps,
external_repo = external_repo,
makevars = ctx.file.makevars,
pkg_deps = pkg_deps,
pkg_gcno_files = None,
pkg_lib_dir = pkg_lib_dir,
pkg_name = pkg_name,
src_archive = ctx.file.src,
src_files = None,
test_files = None,
transitive_pkg_deps = library_deps.transitive_pkg_deps,
transitive_tools = transitive_tools,
),
instrumented_files_info,
]

def _build_binary_pkg_impl(ctx):
info = ctx.toolchains["@com_grail_rules_r//R:toolchain_type"].RInfo

Expand Down Expand Up @@ -631,28 +754,58 @@ _COMMON_ATTRS = {
),
}

_PKG_ATTRS = dict(_COMMON_ATTRS)
# Attributes for both `R CMD build` and `R CMD INSTALL`.
_COMMON_BUILD_ATTRS = {
"cc_deps": attr.label_list(
doc = "cc_library dependencies for this package",
),
"config_override": attr.label(
allow_single_file = True,
doc = "Replace the package configure script with this file",
),
"makevars": attr.label(
allow_single_file = True,
doc = "Additional Makevars file supplied as R_MAKEVARS_USER",
),
"build_tools": attr.label_list(
allow_files = True,
doc = "Executables that package build and load will try to find in the system",
),
"_build_pkg_common_sh": attr.label(
allow_single_file = True,
default = "@com_grail_rules_r//R/scripts:build_pkg_common.sh",
),
"_build_pkg_bin_sh": attr.label(
allow_single_file = True,
default = "@com_grail_rules_r//R/scripts:build_pkg_bin.sh",
executable = True,
cfg = "host",
),
"_flock": attr.label(
default = "@com_grail_rules_r//R/scripts:flock",
executable = True,
cfg = "host",
),
"_instrument_R": attr.label(
allow_single_file = True,
default = "@com_grail_rules_r//R/scripts:instrument.R",
),
}

_PKG_ATTRS.update({
# Attributes for `R CMD build`.
_BUILD_ATTRS = {
"srcs": attr.label_list(
allow_files = True,
mandatory = True,
doc = "Source files to be included for building the package",
),
"cc_deps": attr.label_list(
doc = "cc_library dependencies for this package",
),
"build_args": attr.string_list(
default = [
"--no-build-vignettes",
"--no-manual",
],
doc = "Additional arguments to supply to R CMD build",
),
"config_override": attr.label(
allow_single_file = True,
doc = "Replace the package configure script with this file",
),
"roclets": attr.string_list(
doc = ("roclets to run before installing the package. If this is non-empty, " +
"then you must specify roclets_deps as the R package you want to " +
Expand All @@ -667,10 +820,6 @@ _PKG_ATTRS.update({
],
doc = "roxygen2 or devtools dependency for running roclets",
),
"makevars": attr.label(
allow_single_file = True,
doc = "Additional Makevars file supplied as R_MAKEVARS_USER",
),
"inst_files": attr.label_keyed_string_dict(
allow_files = True,
cfg = "target",
Expand All @@ -679,10 +828,6 @@ _PKG_ATTRS.update({
"destination path. For example, '' will bundle the files to the top level " +
"directory, and 'mydir' will bundle all files into a directory mydir.",
),
"build_tools": attr.label_list(
allow_files = True,
doc = "Executables that package build and load will try to find in the system",
),
"metadata": attr.string_dict(
doc = ("Metadata key-value pairs to add to the DESCRIPTION file before building. " +
"When text is enclosed within `{}`, bazel volatile and stable status " +
Expand All @@ -693,47 +838,41 @@ _PKG_ATTRS.update({
default = -1,
doc = "Same behavior as the stamp attribute in cc_binary rule.",
),
"_build_pkg_common_sh": attr.label(
allow_single_file = True,
default = "@com_grail_rules_r//R/scripts:build_pkg_common.sh",
),
"_build_pkg_src_sh": attr.label(
allow_single_file = True,
default = "@com_grail_rules_r//R/scripts:build_pkg_src.sh",
executable = True,
cfg = "host",
),
"_build_pkg_bin_sh": attr.label(
allow_single_file = True,
default = "@com_grail_rules_r//R/scripts:build_pkg_bin.sh",
executable = True,
cfg = "host",
),
"_merge_test_files_sh": attr.label(
allow_single_file = True,
default = "@com_grail_rules_r//R/scripts:merge_test_files.sh",
executable = True,
cfg = "host",
),
"_flock": attr.label(
default = "@com_grail_rules_r//R/scripts:flock",
executable = True,
cfg = "host",
),
"_instrument_R": attr.label(
allow_single_file = True,
default = "@com_grail_rules_r//R/scripts:instrument.R",
),
"_stamp_description_sh": attr.label(
allow_single_file = True,
default = "@com_grail_rules_r//R/scripts:stamp_description.sh",
executable = True,
cfg = "host",
),
}

_PKG_ATTRS = dict(_COMMON_ATTRS)
_PKG_ATTRS.update(_COMMON_BUILD_ATTRS)
_PKG_ATTRS.update(_BUILD_ATTRS)

_SOURCE_PKG_ATTRS = dict(_COMMON_ATTRS)
_SOURCE_PKG_ATTRS.update(_COMMON_BUILD_ATTRS)
_SOURCE_PKG_ATTRS.update({
"src": attr.label(
allow_single_file = True,
mandatory = True,
doc = "Source archive of the package",
),
})

_BINARY_PKG_ATTRS = dict(_COMMON_ATTRS)

_BINARY_PKG_ATTRS.update({
"src": attr.label(
allow_single_file = True,
Expand Down Expand Up @@ -761,6 +900,19 @@ r_pkg = rule(
implementation = _build_impl,
)

r_source_pkg = rule(
attrs = _SOURCE_PKG_ATTRS,
doc = ("Rule to install the package and its transitive dependencies in " +
"the Bazel sandbox from a source archive."),
outputs = {
"bin_archive": "%{name}.bin.tar.gz",
"src_archive": "%{name}.tar.gz",
"so_lib": "%{name}.so",
},
toolchains = ["@com_grail_rules_r//R:toolchain_type"],
implementation = _build_source_pkg_impl,
)

r_binary_pkg = rule(
attrs = _BINARY_PKG_ATTRS,
doc = ("Rule to install the package and its transitive dependencies in " +
Expand Down
9 changes: 7 additions & 2 deletions R/repositories.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ def _r_repository_impl(rctx):

archive_basename = rctx.attr.urls[0].rsplit("/", 1)[1]

if rctx.attr.pkg_type == "source":
extracted = False
if rctx.attr.pkg_type == "source" and rctx.attr.build_file:
extracted = True
rctx.download_and_extract(
rctx.attr.urls,
sha256 = rctx.attr.sha256,
Expand All @@ -54,8 +56,11 @@ def _r_repository_impl(rctx):
return

args = dict(rctx.attr.razel_args)
if rctx.attr.pkg_type == "binary":
if rctx.attr.pkg_type == "source" and not extracted:
args["pkg_src_archive"] = archive_basename
elif rctx.attr.pkg_type == "binary":
args["pkg_bin_archive"] = archive_basename

script_content = """
source({razel})
buildify({args})
Expand Down
21 changes: 21 additions & 0 deletions R/scripts/build_pkg_bin.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,27 @@ symlink_r_libs "${R_LIBS_DEPS//_EXEC_ROOT_/${EXEC_ROOT}/}"

tar -C "${TMP_SRC}" --strip-components=1 -xzf "${PKG_SRC_ARCHIVE}"

if [[ "${DIRECT_FROM_SOURCE:-false}" ]]; then
# Copy over some steps from build_pkg_src.sh that are only needed if we are
# building directly from a source archive, and did not have to run that script
# first.
if [[ "${CONFIG_OVERRIDE:-}" ]]; then
cp "${CONFIG_OVERRIDE}" "${TMP_SRC}/configure"
fi
if [[ "${C_SO_FILES:-}" ]]; then
mkdir -p "${TMP_SRC}/inst/libs"
for so_file in ${C_SO_FILES}; do
eval so_file="${so_file}" # Use eval to remove outermost quotes.
so_file_name="$(basename "${so_file}")"
cp "${so_file}" "${TMP_SRC}/inst/libs/${so_file_name}"
if [[ "$(uname)" == "Darwin" ]]; then
chmod u+w "${TMP_SRC}/inst/libs/${so_file_name}"
install_name_tool -id "@loader_path/${so_file_name}" "${TMP_SRC}/inst/libs/${so_file_name}"
fi
done
fi
fi

# Install the package to the common temp library.
silent "${R}" CMD INSTALL --built-timestamp='' "${INSTALL_ARGS}" --no-lock --build --library="${TMP_LIB}" --clean "${TMP_SRC}"
rm -rf "${PKG_LIB_PATH:?}/${PKG_NAME}" # Delete empty directories to make way for move.
Expand Down
Loading

0 comments on commit dfee2f7

Please sign in to comment.