oss-review-toolkit · sschuberth · Jan 29, 2024 · Jan 24, 2024 · Jan 21, 2024 · Jan 21, 2024
@@ -229,6 +229,9 @@ ort:
           # Command line options that do not affect the ScanCode output.
           commandLineNonConfig: '--processes 4'
 
+          # Use per-file license findings instead of per-line ones.
+          preferFileLicense: false
+
           # Criteria for matching stored scan results. These can be configured for any scanner that uses semantic
           # versioning. Note that the 'maxVersion' is exclusive and not part of the range of accepted versions.
           minVersion: '3.2.1-rc2'

@@ -255,6 +255,7 @@ class OrtConfigurationTest : WordSpec({
                         options shouldContainExactly mapOf(
                             "commandLine" to "--copyright --license --info --strip-root --timeout 300",
                             "commandLineNonConfig" to "--processes 4",
+                            "preferFileLicense" to "false",
                             "minVersion" to "3.2.1-rc2",
                             "maxVersion" to "32.0.0"
                         )

@@ -33,7 +33,7 @@ import org.ossreviewtoolkit.utils.spdx.getLicenseText
 import org.ossreviewtoolkit.utils.test.ExpensiveTag
 
 class ScanCodeScannerFunTest : AbstractPathScannerWrapperFunTest(setOf(ExpensiveTag)) {
-    override val scanner = ScanCode("ScanCode", ScanCodeConfig.EMPTY, ScannerWrapperConfig.EMPTY)
+    override val scanner = ScanCode("ScanCode", ScanCodeConfig.DEFAULT, ScannerWrapperConfig.EMPTY)
 
     override val expectedFileLicenses = listOf(
         LicenseFinding("Apache-2.0", TextLocation("LICENSE", 1, 187), 100.0f),

@@ -22,8 +22,6 @@ package org.ossreviewtoolkit.plugins.scanners.scancode
 import java.io.File
 import java.time.Instant
 
-import kotlin.math.max
-
 import org.apache.logging.log4j.kotlin.logger
 
 import org.ossreviewtoolkit.model.ScanSummary
@@ -40,7 +38,6 @@ import org.ossreviewtoolkit.utils.common.Options
 import org.ossreviewtoolkit.utils.common.Os
 import org.ossreviewtoolkit.utils.common.ProcessCapture
 import org.ossreviewtoolkit.utils.common.safeDeleteRecursively
-import org.ossreviewtoolkit.utils.common.splitOnWhitespace
 import org.ossreviewtoolkit.utils.common.withoutPrefix
 import org.ossreviewtoolkit.utils.ort.createOrtTempDir
 
@@ -55,49 +52,30 @@ import org.semver4j.Semver
  * configuration [options][PluginConfiguration.options]:
  *
  * * **"commandLine":** Command line options that modify the result. These are added to the [ScannerDetails] when
- *   looking up results from the [ScanResultsStorage]. Defaults to [DEFAULT_CONFIGURATION_OPTIONS].
+ *   looking up results from the [ScanResultsStorage]. Defaults to [ScanCodeConfig.DEFAULT_COMMAND_LINE_OPTIONS].
  * * **"commandLineNonConfig":** Command line options that do not modify the result and should therefore not be
- *   considered in [configuration], like "--processes". Defaults to [DEFAULT_NON_CONFIGURATION_OPTIONS].
+ *   considered in [configuration], like "--processes". Defaults to
+ *   [ScanCodeConfig.DEFAULT_COMMAND_LINE_NON_CONFIG_OPTIONS].
+ * * **preferFileLicense**: A flag to indicate whether the "high-level" per-file license reported by ScanCode starting
+ *   with version 32 should be used instead of the individual "low-level" per-line license findings. The per-file
+ *   license may be different from the conjunction of per-line licenses and is supposed to contain fewer
+ *   false-positives. However, no exact line numbers can be associated to the per-file license anymore. If enabled, the
+ *   start line of the per-file license finding is set to the minimum of all start lines for per-line findings in that
+ *   file, the end line is set to the maximum of all end lines for per-line findings in that file, and the score is set
+ *   to the arithmetic average of the scores of all per-line findings in that file.
  */
 class ScanCode internal constructor(
     name: String,
-    config: ScanCodeConfig,
+    private val config: ScanCodeConfig,
     private val wrapperConfig: ScannerWrapperConfig
 ) : CommandLinePathScannerWrapper(name) {
     // This constructor is required by the `RequirementsCommand`.
-    constructor(name: String, wrapperConfig: ScannerWrapperConfig) : this(name, ScanCodeConfig.EMPTY, wrapperConfig)
+    constructor(name: String, wrapperConfig: ScannerWrapperConfig) : this(name, ScanCodeConfig.DEFAULT, wrapperConfig)
 
     companion object {
         const val SCANNER_NAME = "ScanCode"
 
         private const val LICENSE_REFERENCES_OPTION_VERSION = "32.0.0"
-        private const val OUTPUT_FORMAT = "json-pp"
-        private const val TIMEOUT = 300
-
-        /**
-         * Configuration options that are relevant for [configuration] because they change the result file.
-         */
-        private val DEFAULT_CONFIGURATION_OPTIONS = listOf(
-            "--copyright",
-            "--license",
-            "--info",
-            "--strip-root",
-            "--timeout", TIMEOUT.toString()
-        )
-
-        /**
-         * Configuration options that are not relevant for [configuration] because they do not change the result
-         * file.
-         */
-        private val DEFAULT_NON_CONFIGURATION_OPTIONS = listOf(
-            "--processes", max(1, Runtime.getRuntime().availableProcessors() - 1).toString()
-        )
-
-        private val OUTPUT_FORMAT_OPTION = if (OUTPUT_FORMAT.startsWith("json")) {
-            "--$OUTPUT_FORMAT"
-        } else {
-            "--output-$OUTPUT_FORMAT"
-        }
     }
 
     class Factory : ScannerWrapperFactory<ScanCodeConfig>(SCANNER_NAME) {
@@ -107,35 +85,33 @@ class ScanCode internal constructor(
         override fun parseConfig(options: Options, secrets: Options) = ScanCodeConfig.create(options)
     }
 
-    override val matcher by lazy { ScannerMatcher.create(details, wrapperConfig.matcherConfig) }
-
-    override val readFromStorage by lazy { wrapperConfig.readFromStorageWithDefault(matcher) }
-
-    override val writeToStorage by lazy { wrapperConfig.writeToStorageWithDefault(matcher) }
-
-    override val configuration by lazy {
-        buildList {
-            addAll(configurationOptions)
-            add(OUTPUT_FORMAT_OPTION)
-        }.joinToString(" ")
-    }
-
-    private val configurationOptions = config.commandLine?.splitOnWhitespace() ?: DEFAULT_CONFIGURATION_OPTIONS
-    private val nonConfigurationOptions = config.commandLineNonConfig?.splitOnWhitespace()
-        ?: DEFAULT_NON_CONFIGURATION_OPTIONS
+    private val commandLineOptions by lazy { getCommandLineOptions(version) }
 
     internal fun getCommandLineOptions(version: String) =
         buildList {
-            addAll(configurationOptions)
-            addAll(nonConfigurationOptions)
+            addAll(config.commandLine)
+            addAll(config.commandLineNonConfig)
 
             if (Semver(version).isGreaterThanOrEqualTo(LICENSE_REFERENCES_OPTION_VERSION)) {
                 // Required to be able to map ScanCode license keys to SPDX IDs.
                 add("--license-references")
             }
         }
 
-    val commandLineOptions by lazy { getCommandLineOptions(version) }
+    override val configuration by lazy {
+        buildList {
+            addAll(config.commandLine)
+
+            // Add this in the style of a fake command line option for consistency with the above.
+            if (config.preferFileLicense) add("--prefer-file-license")
+        }.joinToString(" ")
+    }
+
+    override val matcher by lazy { ScannerMatcher.create(details, wrapperConfig.matcherConfig) }
+
+    override val readFromStorage by lazy { wrapperConfig.readFromStorageWithDefault(matcher) }
+
+    override val writeToStorage by lazy { wrapperConfig.writeToStorageWithDefault(matcher) }
 
     override fun command(workingDir: File?) =
         listOfNotNull(workingDir, if (Os.isWindows) "scancode.bat" else "scancode").joinToString(File.separator)
@@ -179,7 +155,7 @@ class ScanCode internal constructor(
     }
 
     override fun createSummary(result: String, startTime: Instant, endTime: Instant): ScanSummary =
-        parseResult(result).toScanSummary()
+        parseResult(result).toScanSummary(config.preferFileLicense)
 
     /**
      * Execute ScanCode with the configured arguments to scan the given [path] and produce [resultFile].
@@ -188,8 +164,8 @@ class ScanCode internal constructor(
         ProcessCapture(
             command(),
             *commandLineOptions.toTypedArray(),
-            path.absolutePath,
-            OUTPUT_FORMAT_OPTION,
-            resultFile.absolutePath
+            // The output format option needs to directly precede the result file path.
+            "--json-pp", resultFile.absolutePath,
+            path.absolutePath
         )
 }
@@ -19,19 +19,49 @@
 
 package org.ossreviewtoolkit.plugins.scanners.scancode
 
+import kotlin.math.max
+import kotlin.time.Duration.Companion.minutes
+
 import org.ossreviewtoolkit.utils.common.Options
+import org.ossreviewtoolkit.utils.common.splitOnWhitespace
 
 data class ScanCodeConfig(
-    val commandLine: String?,
-    val commandLineNonConfig: String?
+    val commandLine: List<String>,
+    val commandLineNonConfig: List<String>,
+    val preferFileLicense: Boolean
 ) {
     companion object {
-        val EMPTY = ScanCodeConfig(null, null)
+        /**
+         * The default time after which scanning a file is aborted.
+         */
+        private val DEFAULT_TIMEOUT = 5.minutes
+
+        /**
+         * The default list of command line options that might have an impact on the scan results.
+         */
+        private val DEFAULT_COMMAND_LINE_OPTIONS = listOf(
+            "--copyright",
+            "--license",
+            "--info",
+            "--strip-root",
+            "--timeout", "${DEFAULT_TIMEOUT.inWholeSeconds}"
+        )
+
+        /**
+         * The default list of command line options that cannot have an impact on the scan results.
+         */
+        private val DEFAULT_COMMAND_LINE_NON_CONFIG_OPTIONS = listOf(
+            "--processes", max(1, Runtime.getRuntime().availableProcessors() - 1).toString()
+        )
 
-        private const val COMMAND_LINE_PROPERTY = "commandLine"
-        private const val COMMAND_LINE_NON_CONFIG_PROPERTY = "commandLineNonConfig"
+        val DEFAULT = create(emptyMap())
 
         fun create(options: Options) =
-            ScanCodeConfig(options[COMMAND_LINE_PROPERTY], options[COMMAND_LINE_NON_CONFIG_PROPERTY])
+            ScanCodeConfig(
+                options["commandLine"]?.splitOnWhitespace() ?: DEFAULT_COMMAND_LINE_OPTIONS,
+                options["commandLineNonConfig"]?.splitOnWhitespace()
+                    ?: DEFAULT_COMMAND_LINE_NON_CONFIG_OPTIONS,
+                options["preferFileLicense"].toBoolean()
+            )
     }
 }
@@ -58,7 +58,7 @@ private data class LicenseMatch(
     val score: Float
 )
 
-fun ScanCodeResult.toScanSummary(): ScanSummary {
+fun ScanCodeResult.toScanSummary(preferFileLicense: Boolean = false): ScanSummary {
     val licenseFindings = mutableSetOf<LicenseFinding>()
     val copyrightFindings = mutableSetOf<CopyrightFinding>()
     val issues = mutableListOf<Issue>()
@@ -91,19 +91,31 @@ fun ScanCodeResult.toScanSummary(): ScanSummary {
             it.value.first()
         }
 
-        licenses.mapTo(licenseFindings) { license ->
-            // ScanCode uses its own license keys as identifiers in license expressions.
-            val spdxLicenseExpression = license.licenseExpression.mapLicense(scanCodeKeyToSpdxIdMappings)
-
-            LicenseFinding(
-                license = spdxLicenseExpression,
+        if (preferFileLicense && file is FileEntry.Version3 && file.detectedLicenseExpressionSpdx != null) {
+            licenseFindings += LicenseFinding(
+                license = file.detectedLicenseExpressionSpdx,
                 location = TextLocation(
                     path = file.path,
-                    startLine = license.startLine,
-                    endLine = license.endLine
+                    startLine = licenses.minOf { it.startLine },
+                    endLine = licenses.maxOf { it.endLine }
                 ),
-                score = license.score
+                score = licenses.map { it.score }.average().toFloat()
             )
+        } else {
+            licenses.mapTo(licenseFindings) { license ->
+                // ScanCode uses its own license keys as identifiers in license expressions.
+                val spdxLicenseExpression = license.licenseExpression.mapLicense(scanCodeKeyToSpdxIdMappings)
+
+                LicenseFinding(
+                    license = spdxLicenseExpression,
+                    location = TextLocation(
+                        path = file.path,
+                        startLine = license.startLine,
+                        endLine = license.endLine
+                    ),
+                    score = license.score
+                )
+            }
         }
 
         file.copyrights.mapTo(copyrightFindings) { copyright ->

@@ -23,6 +23,7 @@ import io.kotest.core.spec.style.FreeSpec
 import io.kotest.matchers.Matcher
 import io.kotest.matchers.collections.beEmpty
 import io.kotest.matchers.collections.containExactlyInAnyOrder
+import io.kotest.matchers.collections.shouldContainExactlyInAnyOrder
 import io.kotest.matchers.collections.shouldHaveSingleElement
 import io.kotest.matchers.collections.shouldHaveSize
 import io.kotest.matchers.should
@@ -80,9 +81,25 @@ class ScanCodeResultParserTest : FreeSpec({
 
                 val summary = parseResult(resultFile).toScanSummary()
 
-                summary.licenseFindings.find {
-                    it.location == TextLocation("README.md", 100) && it.score == 100.0f
-                }?.license.toString() shouldBe "GPL-2.0-only WITH GCC-exception-2.0"
+                with(summary.licenseFindings) {
+                    shouldHaveSize(18)
+                    find { it.location == TextLocation("README.md", 100) && it.score == 100.0f }
+                        ?.license.toString() shouldBe "GPL-2.0-only WITH GCC-exception-2.0"
+                }
+            }
+
+            "get file-level findings with the 'preferFileLicense' option" {
+                val resultFile = getAssetFile("scancode-32.0.8_spdx-expression-parse_no-license-references.json")
+
+                val summary = parseResult(resultFile).toScanSummary(preferFileLicense = true)
+
+                summary.licenseFindings.map { it.license.toString() }.shouldContainExactlyInAnyOrder(
+                    "LicenseRef-scancode-generic-cla AND MIT",
+                    "MIT",
+                    "MIT",
+                    "GPL-2.0-only WITH GCC-exception-2.0 AND JSON AND BSD-2-Clause AND CC-BY-3.0 AND MIT",
+                    "GPL-2.0-only WITH GCC-exception-2.0 AND BSD-3-Clause"
+                )
             }
         }