Skip to content

Commit

Permalink
refactor(gradle-inspector): Migrate the code to use the dependency graph
Browse files Browse the repository at this point in the history
See [1] for context.

[1]: #3825

Signed-off-by: Sebastian Schuberth <sebastian@doubleopen.org>
  • Loading branch information
sschuberth committed Sep 4, 2024
1 parent 0e3900d commit 080b303
Show file tree
Hide file tree
Showing 3 changed files with 289 additions and 253 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,7 @@ project:
revision: "<REPLACE_REVISION>"
path: "plugins/package-managers/gradle/src/funTest/assets/projects/synthetic/gradle/lib-without-repo"
homepage_url: ""
scopes:
- name: "compileClasspath"
dependencies: []
- name: "runtimeClasspath"
dependencies: []
- name: "testCompileClasspath"
dependencies: []
- name: "testRuntimeClasspath"
dependencies: []
scopes: []
packages: []
issues:
- timestamp: "1970-01-01T00:00:00Z"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,271 @@
/*
* Copyright (C) 2024 The ORT Project Authors (see <https://github.com/oss-review-toolkit/ort/blob/main/NOTICE>)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
* License-Filename: LICENSE
*/

package org.ossreviewtoolkit.plugins.packagemanagers.gradleinspector

import OrtDependency

import java.lang.invoke.MethodHandles

import org.apache.logging.log4j.kotlin.logger
import org.apache.logging.log4j.kotlin.loggerOf

import org.ossreviewtoolkit.analyzer.PackageManager.Companion.processPackageVcs
import org.ossreviewtoolkit.downloader.VcsHost
import org.ossreviewtoolkit.model.Hash
import org.ossreviewtoolkit.model.HashAlgorithm
import org.ossreviewtoolkit.model.Identifier
import org.ossreviewtoolkit.model.Issue
import org.ossreviewtoolkit.model.Package
import org.ossreviewtoolkit.model.PackageLinkage
import org.ossreviewtoolkit.model.RemoteArtifact
import org.ossreviewtoolkit.model.VcsInfo
import org.ossreviewtoolkit.model.VcsType
import org.ossreviewtoolkit.model.createAndLogIssue
import org.ossreviewtoolkit.model.orEmpty
import org.ossreviewtoolkit.model.utils.DependencyHandler
import org.ossreviewtoolkit.model.utils.parseRepoManifestPath
import org.ossreviewtoolkit.plugins.packagemanagers.gradlemodel.dependencyType
import org.ossreviewtoolkit.plugins.packagemanagers.gradlemodel.isProjectDependency
import org.ossreviewtoolkit.utils.common.splitOnWhitespace
import org.ossreviewtoolkit.utils.common.withoutPrefix
import org.ossreviewtoolkit.utils.ort.DeclaredLicenseProcessor
import org.ossreviewtoolkit.utils.ort.downloadText
import org.ossreviewtoolkit.utils.ort.okHttpClient
import org.ossreviewtoolkit.utils.spdx.SpdxOperator

/**
* A specialized [DependencyHandler] implementation for Gradle's dependency model.
*/
internal class GradleDependencyHandler : DependencyHandler<OrtDependency> {
override fun identifierFor(dependency: OrtDependency): Identifier =
with(dependency) { Identifier(dependencyType, groupId, artifactId, version) }

override fun dependenciesFor(dependency: OrtDependency): List<OrtDependency> = dependency.dependencies

override fun linkageFor(dependency: OrtDependency): PackageLinkage =
if (dependency.isProjectDependency) PackageLinkage.PROJECT_DYNAMIC else PackageLinkage.DYNAMIC

override fun createPackage(dependency: OrtDependency, issues: MutableCollection<Issue>): Package? {
// Only look for a package if there was no error resolving the dependency and it is no project dependency.
if (dependency.error != null || dependency.isProjectDependency) return null

val id = identifierFor(dependency)
val model = dependency.mavenModel ?: run {
issues += createAndLogIssue(
source = "Gradle",
message = "No Maven model available for '${id.toCoordinates()}'."
)

return null
}

val isSpringMetadataProject = with(id) {
listOf("boot", "cloud").any {
namespace == "org.springframework.$it"
&& (name.startsWith("spring-$it-starter") || name.startsWith("spring-$it-contract-spec"))
}
}

val isMetadataOnly = dependency.extension == "pom" || isSpringMetadataProject

val binaryArtifact = when {
isMetadataOnly -> RemoteArtifact.EMPTY
else -> with(dependency) {
createRemoteArtifact(pomFile, classifier, extension.takeUnless { it == "bundle" })
}
}

val sourceArtifact = when {
isMetadataOnly -> RemoteArtifact.EMPTY
else -> createRemoteArtifact(dependency.pomFile, "sources", "jar")
}

val vcs = dependency.toVcsInfo()
val vcsFallbackUrls = listOfNotNull(model.vcs?.browsableUrl, model.homepageUrl).toTypedArray()
val vcsProcessed = processPackageVcs(vcs, *vcsFallbackUrls)

return Package(
id = id,
authors = model.authors,
declaredLicenses = model.licenses,
declaredLicensesProcessed = DeclaredLicenseProcessor.process(
model.licenses,
// See http://maven.apache.org/ref/3.6.3/maven-model/maven.html#project saying: "If multiple
// licenses are listed, it is assumed that the user can select any of them, not that they must
// accept all."
operator = SpdxOperator.OR
),
description = model.description.orEmpty(),
homepageUrl = model.homepageUrl.orEmpty(),
binaryArtifact = binaryArtifact,
sourceArtifact = sourceArtifact,
vcs = vcs,
vcsProcessed = vcsProcessed,
isMetadataOnly = isMetadataOnly
)
}
}

// See http://maven.apache.org/pom.html#SCM.
private val SCM_REGEX = Regex("scm:(?<type>[^:@]+):(?<url>.+)")
private val USER_HOST_REGEX = Regex("scm:(?<user>[^:@]+)@(?<host>[^:]+)[:/](?<path>.+)")

private val logger = loggerOf(MethodHandles.lookup().lookupClass())

private fun OrtDependency.toVcsInfo(): VcsInfo =
mavenModel?.vcs?.run {
@Suppress("UnsafeCallOnNullableType")
SCM_REGEX.matchEntire(connection)?.let { match ->
val type = match.groups["type"]!!.value
val url = match.groups["url"]!!.value

handleValidScmInfo(type, url, tag)
} ?: handleInvalidScmInfo(connection, tag)
}.orEmpty()

private fun OrtDependency.handleValidScmInfo(type: String, url: String, tag: String): VcsInfo =
when {
// Maven does not officially support git-repo as an SCM, see http://maven.apache.org/scm/scms-overview.html, so
// come up with the convention to use the "manifest" query parameter for the path to the manifest inside the
// repository. An earlier version of this workaround expected the query string to be only the path to the
// manifest, for backward compatibility convert such URLs to the new syntax.
type == "git-repo" -> {
val manifestPath = url.parseRepoManifestPath()
?: url.substringAfter('?').takeIf { it.isNotBlank() && it.endsWith(".xml") }
val urlWithManifest = url.takeIf { manifestPath == null }
?: "${url.substringBefore('?')}?manifest=$manifestPath"

VcsInfo(
type = VcsType.GIT_REPO,
url = urlWithManifest,
revision = tag
)
}

type == "svn" -> {
val revision = tag.takeIf { it.isEmpty() } ?: "tags/$tag"
VcsInfo(type = VcsType.SUBVERSION, url = url, revision = revision)
}

url.startsWith("//") -> {
// Work around the common mistake to omit the Maven SCM provider.
val fixedUrl = "$type:$url"

// Try to detect the Maven SCM provider from the URL only, e.g. by looking at the host or special URL paths.
VcsHost.parseUrl(fixedUrl).copy(revision = tag).also {
logger.info {
"Fixed up invalid SCM connection without a provider in '$groupId:$artifactId:$version' to $it."
}
}
}

else -> {
val trimmedUrl = if (!url.startsWith("git://")) url.removePrefix("git:") else url

VcsHost.fromUrl(trimmedUrl)?.let { host ->
host.toVcsInfo(trimmedUrl)?.let { vcsInfo ->
// Fixup paths that are specified as part of the URL and contain the project name as a prefix.
val projectPrefix = "${host.getProject(trimmedUrl)}-"
vcsInfo.path.withoutPrefix(projectPrefix)?.let { path ->
vcsInfo.copy(path = path)
}
}
} ?: VcsInfo(type = VcsType.forName(type), url = trimmedUrl, revision = tag)
}
}

private fun OrtDependency.handleInvalidScmInfo(connection: String, tag: String): VcsInfo =
@Suppress("UnsafeCallOnNullableType")
USER_HOST_REGEX.matchEntire(connection)?.let { match ->
// Some projects omit the provider and use the SCP-like Git URL syntax, for example
// "scm:git@github.com:facebook/facebook-android-sdk.git".
val user = match.groups["user"]!!.value
val host = match.groups["host"]!!.value
val path = match.groups["path"]!!.value

if (user == "git" || host.startsWith("git")) {
VcsInfo(type = VcsType.GIT, url = "https://$host/$path", revision = tag)
} else {
VcsInfo.EMPTY
}
} ?: run {
val dep = "$groupId:$artifactId:$version"

if (connection.startsWith("git://") || connection.endsWith(".git")) {
// It is a common mistake to omit the "scm:[provider]:" prefix. Add fall-backs for nevertheless clear
// cases.
logger.info {
"Maven SCM connection '$connection' in '$dep' lacks the required 'scm' prefix."
}

VcsInfo(type = VcsType.GIT, url = connection, revision = tag)
} else {
if (connection.isNotEmpty()) {
logger.info {
"Ignoring Maven SCM connection '$connection' in '$dep' due to an unexpected format."
}
}

VcsInfo.EMPTY
}
}

/**
* Create a [RemoteArtifact] based on the given [pomUrl], [classifier] and [extension]. The hash value is retrieved
* remotely.
*/
private fun createRemoteArtifact(
pomUrl: String?,
classifier: String? = null,
extension: String? = null
): RemoteArtifact {
val algorithm = "sha1"
val artifactBaseUrl = pomUrl?.removeSuffix(".pom") ?: return RemoteArtifact.EMPTY

val artifactUrl = buildString {
append(artifactBaseUrl)
if (!classifier.isNullOrEmpty()) append("-$classifier")
if (!extension.isNullOrEmpty()) append(".$extension") else append(".jar")
}

// TODO: How to handle authentication for private repositories here, or rely on Gradle for the download?
val checksum = okHttpClient.downloadText("$artifactUrl.$algorithm")
.getOrElse { return RemoteArtifact.EMPTY }

val hash = parseChecksum(checksum, algorithm)

// Ignore file with zero byte size, because it cannot be a valid archive.
if (hash.value == HashAlgorithm.SHA1.emptyValue) {
logger.info { "Ignoring zero byte size artifact: $artifactUrl" }
return RemoteArtifact.EMPTY
}

return RemoteArtifact(artifactUrl, hash)
}

/**
* Split the provided [checksum] by whitespace and return a [Hash] for the first element that matches the provided
* algorithm. If no element matches, return [Hash.NONE]. This works around the issue that Maven checksum files sometimes
* contain arbitrary strings before or after the actual checksum.
*/
private fun parseChecksum(checksum: String, algorithm: String) =
checksum.splitOnWhitespace().firstNotNullOfOrNull {
runCatching { Hash(it, algorithm) }.getOrNull()
} ?: Hash.NONE
Loading

0 comments on commit 080b303

Please sign in to comment.