Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Misc. SPDX / license mapping related improvements #8730

Merged
merged 7 commits into from
Jun 18, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ import org.ossreviewtoolkit.utils.common.Options
import org.ossreviewtoolkit.utils.common.collectMessages
import org.ossreviewtoolkit.utils.ort.OkHttpClientHelper
import org.ossreviewtoolkit.utils.ort.showStackTrace
import org.ossreviewtoolkit.utils.spdx.SpdxExpression
import org.ossreviewtoolkit.utils.spdx.toSpdx
import org.ossreviewtoolkit.utils.spdx.SpdxExpression.Strictness
import org.ossreviewtoolkit.utils.spdx.toSpdxOrNull

import retrofit2.HttpException

Expand Down Expand Up @@ -149,12 +149,9 @@ class ClearlyDefinedPackageCurationProvider(
filteredCurations.forEach inner@{ (coordinates, curation) ->
val pkgId = coordinatesToIds[coordinates] ?: return@inner

val declaredLicenseParsed = curation.licensed?.declared?.let { declaredLicense ->
// Only take curations of good quality (i.e. those not using deprecated identifiers) and in
// particular none that contain "OTHER" as a license, also see
// https://github.com/clearlydefined/curated-data/issues/7836.
runCatching { declaredLicense.toSpdx(SpdxExpression.Strictness.ALLOW_CURRENT) }.getOrNull()
}
// Only take curations of good quality (i.e. those not using deprecated identifiers) and in particular none
// that contain "OTHER" as a license, also see https://github.com/clearlydefined/curated-data/issues/7836.
val declaredLicenseParsed = curation.licensed?.declared?.toSpdxOrNull(Strictness.ALLOW_CURRENT)

val sourceLocation = curation.described?.sourceLocation?.toArtifactOrVcs()

Expand Down
16 changes: 3 additions & 13 deletions utils/ort/src/main/kotlin/DeclaredLicenseProcessor.kt
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,14 @@ import com.fasterxml.jackson.annotation.JsonInclude
import com.fasterxml.jackson.annotation.JsonPropertyOrder
import com.fasterxml.jackson.databind.annotation.JsonSerialize

import org.apache.logging.log4j.kotlin.logger

import org.ossreviewtoolkit.utils.common.StringSortedSetConverter
import org.ossreviewtoolkit.utils.common.collectMessages
import org.ossreviewtoolkit.utils.common.unquote
import org.ossreviewtoolkit.utils.spdx.SpdxCompoundExpression
import org.ossreviewtoolkit.utils.spdx.SpdxConstants
import org.ossreviewtoolkit.utils.spdx.SpdxDeclaredLicenseMapping
import org.ossreviewtoolkit.utils.spdx.SpdxExpression
import org.ossreviewtoolkit.utils.spdx.SpdxOperator
import org.ossreviewtoolkit.utils.spdx.toSpdx
import org.ossreviewtoolkit.utils.spdx.toSpdxOrNull

object DeclaredLicenseProcessor {
private val urlPrefixesToRemove = listOf(
Expand Down Expand Up @@ -92,9 +89,9 @@ object DeclaredLicenseProcessor {
?: SpdxDeclaredLicenseMapping.map(strippedLicense)
?: SpdxDeclaredLicenseMapping.map(strippedLicense.unquote())
?: SpdxDeclaredLicenseMapping.map(strippedLicense.removePrefix(SpdxConstants.TAG).trim())
?: parseLicense(strippedLicense)

return mappedLicense?.normalize()?.takeIf { it.isValid() || it.toString() == SpdxConstants.NONE }
val processedLicense = mappedLicense ?: strippedLicense.toSpdxOrNull()
return processedLicense?.normalize()?.takeIf { it.isValid() || it.toString() == SpdxConstants.NONE }
}

/**
Expand Down Expand Up @@ -133,13 +130,6 @@ object DeclaredLicenseProcessor {

return ProcessedDeclaredLicense(spdxExpression, mapped, unmapped)
}

private fun parseLicense(declaredLicense: String) =
runCatching {
declaredLicense.toSpdx()
}.onFailure {
logger.debug { "Could not parse declared license '$declaredLicense': ${it.collectMessages()}" }
}.getOrNull()
}

data class ProcessedDeclaredLicense(
Expand Down
22 changes: 21 additions & 1 deletion utils/spdx/src/main/kotlin/Extensions.kt
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,19 @@
* License-Filename: LICENSE
*/

@file:Suppress("TooManyFunctions")

package org.ossreviewtoolkit.utils.spdx

import java.lang.invoke.MethodHandles

import org.apache.logging.log4j.kotlin.loggerOf

import org.ossreviewtoolkit.utils.common.collectMessages
import org.ossreviewtoolkit.utils.spdx.SpdxExpression.Strictness

private val logger = loggerOf(MethodHandles.lookup().lookupClass())

/**
* Create an [SpdxExpression] by concatenating [this][SpdxLicense] and [other] using [SpdxOperator.AND].
*/
Expand Down Expand Up @@ -79,12 +88,23 @@ fun String.isSpdxExpressionOrNotPresent(strictness: Strictness = Strictness.ALLO
SpdxConstants.isNotPresent(this) || isSpdxExpression(strictness)

/**
* Parses the string as an [SpdxExpression] of the given [strictness] and returns the result on success, or throws an
* Parse this string as an [SpdxExpression] of the given [strictness] and return the result on success, or throw an
* [SpdxException] if the string cannot be parsed.
*/
fun String.toSpdx(strictness: Strictness = Strictness.ALLOW_ANY): SpdxExpression =
SpdxExpression.parse(this, strictness)

/**
* Parse this string as an [SpdxExpression] of the given [strictness] and return the result on success, or null if this
* string cannot be parsed.
*/
fun String.toSpdxOrNull(strictness: Strictness = Strictness.ALLOW_ANY): SpdxExpression? =
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Parses -> Parse
returns -> return
maybe: the string -> this string

runCatching {
toSpdx(strictness)
}.onFailure {
logger.debug { "Could not parse '$this' as an SPDX license: ${it.collectMessages()}" }
}.getOrNull()

/**
* Convert a [String] to an SPDX "idstring" (like license IDs, package IDs, etc.) which may only contain letters,
* numbers, ".", and / or "-". If [allowPlusSuffix] is enabled, a "+" (as used in license IDs) is kept as the suffix.
Expand Down
7 changes: 3 additions & 4 deletions utils/spdx/src/main/kotlin/SpdxExpression.kt
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,9 @@ sealed class SpdxExpression {
}

/**
* Normalize all license IDs using a mapping containing common misspellings of license IDs. If [mapDeprecated] is
* `true`, also deprecated IDs are mapped to their current counterparts. The result of this function is not
* guaranteed to contain only valid IDs. Use [validate] to check the returned [SpdxExpression] for validity
* afterwards.
* Normalize all license IDs using [SpdxSimpleLicenseMapping]. If [mapDeprecated] is `true`, also deprecated IDs are
* mapped to their current counterparts. The result of this function is not guaranteed to contain only valid IDs.
* Use [validate] or [isValid] to check the returned [SpdxExpression] for validity afterwards.
*/
abstract fun normalize(mapDeprecated: Boolean = true): SpdxExpression

Expand Down
5 changes: 2 additions & 3 deletions utils/spdx/src/main/kotlin/SpdxSimpleLicenseMapping.kt
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@ import com.fasterxml.jackson.module.kotlin.readValue

/**
* A mapping from simple license names to valid SPDX license IDs. This mapping only contains license strings which *can*
* be parsed by [SpdxExpression.parse] but have a corresponding valid SPDX license ID that should be used instead. When
* mapping a name without any indication of a version to an ID with a version, the most commonly used version at the
* time of writing is used. See [SpdxDeclaredLicenseMapping] for a mapping of unparsable license strings.
* be parsed by [SpdxExpression.parse] but have a corresponding valid SPDX license ID that should be used instead. See
* [SpdxDeclaredLicenseMapping] for a mapping of unparsable license strings.
*/
object SpdxSimpleLicenseMapping {
/**
Expand Down
4 changes: 4 additions & 0 deletions utils/spdx/src/main/resources/declared-license-mapping.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
# SPDX-License-Identifier: Apache-2.0
# License-Filename: LICENSE

# A mapping from license strings collected from the declared licenses of Open Source packages to SPDX expressions. This
# mapping only contains license strings which can *not* be parsed by [SpdxExpression.parse], for example because the
# license names contain white spaces. See [SpdxSimpleLicenseMapping] for a mapping of varied license names.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should it be various instead of varied?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, "varied" is correct (and also used in the original text that I copied this from) as it's supposed to mean "variants of license names" and not "several license names".


# Sort the entries below via IntelliJ's "Edit" -> "Sort Lines".
# Map a declared license string to "NONE" in order to discard it.
---
Expand Down
4 changes: 4 additions & 0 deletions utils/spdx/src/main/resources/simple-license-mapping.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
# SPDX-License-Identifier: Apache-2.0
# License-Filename: LICENSE

# A mapping from simple license names to valid SPDX license IDs. This mapping only contains license strings which *can*
# be parsed by [SpdxExpression.parse] but have a corresponding valid SPDX license ID that should be used instead. See
# [SpdxDeclaredLicenseMapping] for a mapping of unparsable license strings.

# Sort the entries below via IntelliJ's "Edit" -> "Sort Lines".
---

Expand Down
Loading