Skip to content

Commit

Permalink
feat(vcs): Add Git-specific configuration options for submodule handling
Browse files Browse the repository at this point in the history
For large repositories with many layers of nested Git submodules, the
download process can be very time-consuming and often results in
duplicate projects in the tree of nested submodules.
This feature introduces configuration options to limit the recursive
checkout of nested Git submodules to the first layer, optimizing
performance and reducing redundancy. Additionally, it also allows to
limit the depth of commit history to fetch when downloading
the projects.

Signed-off-by: Wolfgang Klenk <[email protected]>
  • Loading branch information
wkl3nk committed Nov 21, 2024
1 parent d352301 commit c15ce99
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 32 deletions.
5 changes: 3 additions & 2 deletions downloader/src/test/kotlin/VersionControlSystemTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import org.ossreviewtoolkit.model.Package
import org.ossreviewtoolkit.model.VcsInfo
import org.ossreviewtoolkit.model.VcsType
import org.ossreviewtoolkit.plugins.versioncontrolsystems.git.Git
import org.ossreviewtoolkit.plugins.versioncontrolsystems.git.GitConfiguration
import org.ossreviewtoolkit.utils.common.CommandLineTool

class VersionControlSystemTest : WordSpec({
Expand Down Expand Up @@ -87,7 +88,7 @@ class VersionControlSystemTest : WordSpec({

every { workingTree.guessRevisionName(any(), any()) } returns "v1.6.0"

Git.Factory().create(VersionControlSystemConfiguration())
Git.Factory().create(GitConfiguration())
.getRevisionCandidates(workingTree, pkg, allowMovingRevisions = true) shouldBeSuccess listOf(
"v1.6.0"
)
Expand All @@ -111,7 +112,7 @@ class VersionControlSystemTest : WordSpec({
every { workingTree.listRemoteBranches() } returns listOf("main")
every { workingTree.listRemoteTags() } returns emptyList()

Git.Factory().create(VersionControlSystemConfiguration())
Git.Factory().create(GitConfiguration())
.getRevisionCandidates(workingTree, pkg, allowMovingRevisions = true) shouldBeSuccess listOf(
"master",
"main"
Expand Down
86 changes: 59 additions & 27 deletions plugins/version-control-systems/git/src/main/kotlin/Git.kt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import java.security.PublicKey

import org.apache.logging.log4j.kotlin.logger

import org.eclipse.jgit.api.Git
import org.eclipse.jgit.api.Git as JGit
import org.eclipse.jgit.api.LsRemoteCommand
import org.eclipse.jgit.api.errors.GitAPIException
import org.eclipse.jgit.errors.UnsupportedCredentialItem
Expand All @@ -45,7 +45,6 @@ import org.eclipse.jgit.transport.sshd.ServerKeyDatabase
import org.eclipse.jgit.transport.sshd.SshdSessionFactory

import org.ossreviewtoolkit.downloader.VersionControlSystem
import org.ossreviewtoolkit.downloader.VersionControlSystemConfiguration
import org.ossreviewtoolkit.downloader.VersionControlSystemFactory
import org.ossreviewtoolkit.downloader.WorkingTree
import org.ossreviewtoolkit.model.VcsInfo
Expand All @@ -62,9 +61,6 @@ import org.ossreviewtoolkit.utils.ort.showStackTrace
import org.semver4j.RangesList
import org.semver4j.RangesListFactory

// TODO: Make this configurable.
const val GIT_HISTORY_DEPTH = 50

// Replace prefixes of Git submodule repository URLs.
private val REPOSITORY_URL_PREFIX_REPLACEMENTS = listOf(
"git://" to "https://"
Expand All @@ -87,8 +83,10 @@ object GitCommand : CommandLineTool {
override fun displayName(): String = "Git"
}

class Git : VersionControlSystem(GitCommand) {
class Git internal constructor(private val vcsConfig: GitConfiguration = GitConfiguration()) :
VersionControlSystem(GitCommand) {
companion object {

init {
// Make sure that JGit uses the exact same authentication information as ORT itself. This addresses
// discrepancies in the way .netrc files are interpreted between JGit's and ORT's implementation.
Expand Down Expand Up @@ -122,20 +120,24 @@ class Git : VersionControlSystem(GitCommand) {
override val type = VcsType.GIT.toString()
override val latestRevisionNames = listOf("HEAD", "@")

class Factory : VersionControlSystemFactory<VersionControlSystemConfiguration>(VcsType.GIT.toString(), 100) {
override fun create(config: VersionControlSystemConfiguration): VersionControlSystem {
return Git()
class Factory : VersionControlSystemFactory<GitConfiguration>(VcsType.GIT.toString(), 100) {
override fun create(config: GitConfiguration): VersionControlSystem {
logger.info("Creating Git VCS with configuration $config.")
return Git(config)
}

override fun parseConfig(options: Options, secrets: Options): VersionControlSystemConfiguration {
return VersionControlSystemConfiguration() // No specific Subversion configuration yet.
override fun parseConfig(options: Options, secrets: Options): GitConfiguration {
return GitConfiguration(
submoduleHistoryDepth = options["submoduleHistoryDepth"]?.toIntOrNull() ?: 50,
updateNestedSubmodules = options["updateNestedSubmodules"]?.toBoolean() ?: true
)
}
}

override fun getVersion() = GitCommand.getVersion(null)

override fun getDefaultBranchName(url: String): String {
val refs = Git.lsRemoteRepository().setRemote(url).callAsMap()
val refs = JGit.lsRemoteRepository().setRemote(url).callAsMap()
return (refs["HEAD"] as? SymbolicRef)?.target?.name?.removePrefix("refs/heads/") ?: "master"
}

Expand All @@ -150,7 +152,7 @@ class Git : VersionControlSystem(GitCommand) {

override fun initWorkingTree(targetDir: File, vcs: VcsInfo): WorkingTree {
try {
Git.init().setDirectory(targetDir).call().use { git ->
JGit.init().setDirectory(targetDir).call().use { git ->
git.remoteAdd().setName("origin").setUri(URIish(vcs.url)).call()

if (Os.isWindows) {
Expand Down Expand Up @@ -185,12 +187,21 @@ class Git : VersionControlSystem(GitCommand) {
recursive: Boolean
): Result<String> =
(workingTree as GitWorkingTree).useRepo {
Git(this).use { git ->
JGit(this).use { git ->
logger.info { "Updating working tree from ${workingTree.getRemoteUrl()}." }

updateWorkingTreeWithoutSubmodules(workingTree, git, revision).mapCatching {
val historyDepth = vcsConfig.submoduleHistoryDepth
updateWorkingTreeWithoutSubmodules(workingTree, git, revision, historyDepth).mapCatching {
// In case this throws the exception gets encapsulated as a failure.
if (recursive) updateSubmodules(workingTree)
if (recursive) {
val updateNestedSubmodules =
vcsConfig.updateNestedSubmodules
updateSubmodules(
workingTree,
recursive = updateNestedSubmodules,
historyDepth = historyDepth
)
}

revision
}
Expand All @@ -199,13 +210,14 @@ class Git : VersionControlSystem(GitCommand) {

private fun updateWorkingTreeWithoutSubmodules(
workingTree: WorkingTree,
git: Git,
revision: String
git: JGit,
revision: String,
historyDepth: Int
): Result<String> =
runCatching {
logger.info { "Trying to fetch only revision '$revision' with depth limited to $GIT_HISTORY_DEPTH." }
logger.info { "Trying to fetch only revision '$revision' with depth limited to $historyDepth." }

val fetch = git.fetch().setDepth(GIT_HISTORY_DEPTH)
val fetch = git.fetch().setDepth(historyDepth)

// See https://git-scm.com/docs/gitrevisions#_specifying_revisions for how Git resolves ambiguous
// names. In particular, tag names have higher precedence than branch names.
Expand All @@ -223,13 +235,13 @@ class Git : VersionControlSystem(GitCommand) {
it.showStackTrace()

logger.info { "Could not fetch only revision '$revision': ${it.collectMessages()}" }
logger.info { "Falling back to fetching all refs with depth limited to $GIT_HISTORY_DEPTH." }
logger.info { "Falling back to fetching all refs with depth limited to $historyDepth." }

git.fetch().setDepth(GIT_HISTORY_DEPTH).setTagOpt(TagOpt.FETCH_TAGS).call()
git.fetch().setDepth(historyDepth).setTagOpt(TagOpt.FETCH_TAGS).call()
}.recoverCatching {
it.showStackTrace()

logger.info { "Could not fetch with only a depth of $GIT_HISTORY_DEPTH: ${it.collectMessages()}" }
logger.info { "Could not fetch with only a depth of $historyDepth: ${it.collectMessages()}" }
logger.info { "Falling back to fetch everything including tags." }

git.fetch().setUnshallow(true).setTagOpt(TagOpt.FETCH_TAGS).call()
Expand Down Expand Up @@ -284,7 +296,14 @@ class Git : VersionControlSystem(GitCommand) {
revision
}

private fun updateSubmodules(workingTree: WorkingTree) {
/**
* Initialize, update, and clone all the submodules in a working tree.
*
* If [recursive] is set to true, then the operations are not only performed on the
* submodules in the top-level of the working tree, but also on the submodules of the submodules, and so on.
* If [recursive] is set to false, only the submodules on the top-level are initialized, updated, and cloned.
*/
private fun updateSubmodules(workingTree: WorkingTree, recursive: Boolean, historyDepth: Int) {
if (!workingTree.getRootPath().resolve(".gitmodules").isFile) return

val insteadOf = REPOSITORY_URL_PREFIX_REPLACEMENTS.map { (prefix, replacement) ->
Expand All @@ -293,14 +312,27 @@ class Git : VersionControlSystem(GitCommand) {

runCatching {
// TODO: Migrate this to JGit once https://bugs.eclipse.org/bugs/show_bug.cgi?id=580731 is implemented.
workingTree.runGit("submodule", "update", "--init", "--recursive", "--depth", "$GIT_HISTORY_DEPTH")
val updateArgs = mutableListOf("submodule", "update", "--init", "--depth", "$historyDepth").apply {
if (recursive) { add("--recursive") }
}

workingTree.runGit(*updateArgs.toTypedArray())

insteadOf.forEach {
workingTree.runGit("submodule", "foreach", "--recursive", "git config $it")
val foreachArgs = mutableListOf("submodule", "foreach").apply {
if (recursive) { add("--recursive") }
add("git config $it")
}

workingTree.runGit(*foreachArgs.toTypedArray())
}
}.recover {
// As Git's dumb HTTP transport does not support shallow capabilities, also try to not limit the depth.
workingTree.runGit("submodule", "update", "--recursive")
val fallbackArgs = mutableListOf("submodule", "update").apply {
if (recursive) { add("--recursive") }
}

workingTree.runGit(*fallbackArgs.toTypedArray())
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Copyright (C) 2024 The ORT Project Authors (see <https://github.com/oss-review-toolkit/ort/blob/main/NOTICE>)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
* License-Filename: LICENSE
*/
package org.ossreviewtoolkit.plugins.versioncontrolsystems.git

import org.ossreviewtoolkit.downloader.VersionControlSystemConfiguration

data class GitConfiguration(
/**
* Depth of the commit history to fetch when updating submodules
*/
val submoduleHistoryDepth: Int = 50,

/**
* Whether nested submodules should be updated, or if only the submodules
* on the first layer should be considered.
*/
val updateNestedSubmodules: Boolean = true
) : VersionControlSystemConfiguration()
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ class GitRepo internal constructor() : VersionControlSystem(GitRepoCommand) {

paths.forEach { path ->
// Add the nested Repo project.
val workingTree = Git.Factory().create(VersionControlSystemConfiguration())
val workingTree = Git.Factory().create(GitConfiguration())
.getWorkingTree(getRootPath().resolve(path))
nested[path] = workingTree.getInfo()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ import io.kotest.matchers.shouldBe

import java.io.IOException

import org.ossreviewtoolkit.downloader.VersionControlSystemConfiguration
import org.ossreviewtoolkit.model.Package
import org.ossreviewtoolkit.model.VcsInfo
import org.ossreviewtoolkit.model.VcsType
import org.ossreviewtoolkit.plugins.versioncontrolsystems.git.Git
import org.ossreviewtoolkit.plugins.versioncontrolsystems.git.GitConfiguration

class SafeDeleteRecursivelyFunTest : WordSpec({
"File.safeDeleteRecursively()" should {
Expand Down Expand Up @@ -59,7 +59,7 @@ class SafeDeleteRecursivelyFunTest : WordSpec({
)

val nodeDir = tempdir().resolve("node-dir")
Git.Factory().create(VersionControlSystemConfiguration())
Git.Factory().create(GitConfiguration())
.download(pkg, nodeDir)

shouldNotThrow<IOException> {
Expand Down

0 comments on commit c15ce99

Please sign in to comment.