Skip to content

Commit

Permalink
feat: guarantee unique BomRefs in serialization result (#479)
Browse files Browse the repository at this point in the history
Incorporate `output.BomRefDiscriminator` on serialization

Signed-off-by: Jan Kowalleck <jan.kowalleck@gmail.com>
  • Loading branch information
jkowalleck committed Oct 31, 2023
1 parent f61a730 commit a648775
Show file tree
Hide file tree
Showing 8 changed files with 129 additions and 34 deletions.
25 changes: 10 additions & 15 deletions cyclonedx/model/bom.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import warnings
from datetime import datetime
from itertools import chain
from typing import TYPE_CHECKING, Iterable, Optional, Set, Union
from typing import TYPE_CHECKING, Generator, Iterable, Optional, Union
from uuid import UUID, uuid4

import serializable
Expand Down Expand Up @@ -427,16 +427,11 @@ def external_references(self) -> 'SortedSet[ExternalReference]':
def external_references(self, external_references: Iterable[ExternalReference]) -> None:
self._external_references = SortedSet(external_references)

def _get_all_components(self) -> Set[Component]:
components: Set[Component] = set()
def _get_all_components(self) -> Generator[Component, None, None]:
if self.metadata.component:
components.update(self.metadata.component.get_all_nested_components(include_self=True))

# Add Components and sub Components
yield from self.metadata.component.get_all_nested_components(include_self=True)
for c in self.components:
components.update(c.get_all_nested_components(include_self=True))

return components
yield from c.get_all_nested_components(include_self=True)

def get_vulnerabilities_for_bom_ref(self, bom_ref: BomRef) -> 'SortedSet[Vulnerability]':
"""
Expand Down Expand Up @@ -543,13 +538,13 @@ def validate(self) -> bool:
self.register_dependency(target=_s)

# 1. Make sure dependencies are all in this Bom.
all_bom_refs = set(map(lambda c: c.bom_ref, self._get_all_components())) | set(
component_bom_refs = set(map(lambda c: c.bom_ref, self._get_all_components())) | set(
map(lambda s: s.bom_ref, self.services))
all_dependency_bom_refs = set(chain((d.ref for d in self.dependencies),
chain.from_iterable(
d.dependencies_as_bom_refs() for d in self.dependencies)))

dependency_diff = all_dependency_bom_refs - all_bom_refs
dependency_bom_refs = set(chain(
(d.ref for d in self.dependencies),
chain.from_iterable(d.dependencies_as_bom_refs() for d in self.dependencies)
))
dependency_diff = dependency_bom_refs - component_bom_refs
if len(dependency_diff) > 0:
raise UnknownComponentDependencyException(
f'One or more Components have Dependency references to Components/Services that are not known in this '
Expand Down
43 changes: 42 additions & 1 deletion cyclonedx/output/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,15 @@
import os
import warnings
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any, Literal, Mapping, Optional, Type, Union, overload
from itertools import chain
from random import random
from typing import TYPE_CHECKING, Any, Iterable, Literal, Mapping, Optional, Type, Union, overload

from ..schema import OutputFormat, SchemaVersion

if TYPE_CHECKING: # pragma: no cover
from ..model.bom import Bom
from ..model.bom_ref import BomRef
from .json import Json as JsonOutputter
from .xml import Xml as XmlOutputter

Expand Down Expand Up @@ -144,3 +147,41 @@ def get_instance(bom: 'Bom', output_format: OutputFormat = OutputFormat.XML,
category=DeprecationWarning, stacklevel=1
)
return make_outputter(bom, output_format, schema_version)


class BomRefDiscriminator:

def __init__(self, bomrefs: Iterable['BomRef'], prefix: str = 'BomRef') -> None:
# do not use dict/ set here, different BomRefs with same value have same hash abd would shadow each other
self._bomrefs = tuple((bomref, bomref.value) for bomref in bomrefs)
self._prefix = prefix

def __enter__(self) -> None:
self.discriminate()

def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
self.reset()

def discriminate(self) -> None:
known_values = set()
for bomref, _ in self._bomrefs:
value = bomref.value
if value in known_values:
value = self._make_unique()
bomref.value = value
known_values.add(value)

def reset(self) -> None:
for bomref, original_value in self._bomrefs:
bomref.value = original_value

def _make_unique(self) -> str:
return f'{self._prefix}{str(random())[1:]}{str(random())[1:]}' # nosec B311

@classmethod
def from_bom(cls, bom: 'Bom', prefix: str = 'BomRef') -> 'BomRefDiscriminator':
return cls(chain(
map(lambda c: c.bom_ref, bom._get_all_components()),
map(lambda s: s.bom_ref, bom.services),
map(lambda v: v.bom_ref, bom.vulnerabilities)
), prefix)
9 changes: 5 additions & 4 deletions cyclonedx/output/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
SchemaVersion1Dot3,
SchemaVersion1Dot4,
)
from . import BaseOutput
from . import BaseOutput, BomRefDiscriminator

if TYPE_CHECKING: # pragma: no cover
from ..model.bom import Bom
Expand Down Expand Up @@ -67,9 +67,10 @@ def generate(self, force_regeneration: bool = False) -> None:
_view = SCHEMA_VERSIONS.get(self.schema_version_enum)
bom = self.get_bom()
bom.validate()
bom_json: Dict[str, Any] = json_loads(
bom.as_json( # type:ignore[attr-defined]
view_=_view))
with BomRefDiscriminator.from_bom(bom):
bom_json: Dict[str, Any] = json_loads(
bom.as_json( # type:ignore[attr-defined]
view_=_view))
bom_json.update(_json_core)
self._bom_json = bom_json
self.generated = True
Expand Down
20 changes: 11 additions & 9 deletions cyclonedx/output/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
SchemaVersion1Dot3,
SchemaVersion1Dot4,
)
from . import BaseOutput
from . import BaseOutput, BomRefDiscriminator

if TYPE_CHECKING: # pragma: no cover
from ..model.bom import Bom
Expand All @@ -57,14 +57,16 @@ def generate(self, force_regeneration: bool = False) -> None:
bom = self.get_bom()
bom.validate()
xmlns = self.get_target_namespace()
self._bom_xml = '<?xml version="1.0" ?>\n' + xml_dumps(
bom.as_xml( # type:ignore[attr-defined]
_view, as_string=False, xmlns=xmlns),
method='xml', default_namespace=xmlns, encoding='unicode',
# `xml-declaration` is inconsistent/bugged in py38, especially on Windows it will print a non-UTF8 codepage.
# Furthermore, it might add an encoding of "utf-8" which is redundant default value of XML.
# -> so we write the declaration manually, as long as py38 is supported.
xml_declaration=False)
with BomRefDiscriminator.from_bom(bom):
self._bom_xml = '<?xml version="1.0" ?>\n' + xml_dumps(
bom.as_xml( # type:ignore[attr-defined]
_view, as_string=False, xmlns=xmlns),
method='xml', default_namespace=xmlns, encoding='unicode',
# `xml-declaration` is inconsistent/bugged in py38,
# especially on Windows it will print a non-UTF8 codepage.
# Furthermore, it might add an encoding of "utf-8" which is redundant default value of XML.
# -> so we write the declaration manually, as long as py38 is supported.
xml_declaration=False)

self.generated = True

Expand Down
14 changes: 13 additions & 1 deletion tests/_data/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from datetime import datetime, timezone
from decimal import Decimal
from inspect import getmembers, isfunction
from typing import Any, List, Optional
from typing import Any, List, Optional, Tuple
from uuid import UUID

# See https://github.com/package-url/packageurl-python/issues/65
Expand Down Expand Up @@ -754,6 +754,18 @@ def get_bom_with_multiple_licenses() -> Bom:
)


def bom_all_same_bomref() -> Tuple[Bom, int]:
bom = Bom()
bom.metadata.component = Component(name='root', bom_ref='foo', components=[
Component(name='root.sub', bom_ref='foo')])
bom.components.add(Component(name='comp', bom_ref='foo', components=[
Component(name='comp.sub', bom_ref='foo')]))
bom.services.add(Service(name='serv', bom_ref='foo'))
bom.vulnerabilities.add(Vulnerability(id='vuln', bom_ref='foo'))
nr_bomrefs = 6 # number of bom-refs used
return bom, nr_bomrefs


# ---


Expand Down
30 changes: 29 additions & 1 deletion tests/test_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
from ddt import data, ddt, named_data, unpack

from cyclonedx.model.bom import Bom
from cyclonedx.output import make_outputter
from cyclonedx.model.bom_ref import BomRef
from cyclonedx.output import BomRefDiscriminator, make_outputter
from cyclonedx.schema import OutputFormat, SchemaVersion


Expand All @@ -49,3 +50,30 @@ def test_fails_on_wrong_args(self, of: OutputFormat, sv: SchemaVersion, raises_r
bom = Mock(spec=Bom)
with self.assertRaisesRegex(*raises_regex):
make_outputter(bom, of, sv)


class TestBomRefDiscriminator(TestCase):

def test_discriminate_and_reset_with(self) -> None:
bomref1 = BomRef('djdlkfjdslkf')
bomref2 = BomRef('djdlkfjdslkf')
self.assertEqual(bomref1.value, bomref2.value, 'blank')
discr = BomRefDiscriminator([bomref1, bomref2])
self.assertEqual(bomref1.value, bomref2.value, 'init')
discr.discriminate()
self.assertNotEqual(bomref1.value, bomref2.value, 'should be discriminated')
discr.reset()
self.assertEqual('djdlkfjdslkf', bomref1.value)
self.assertEqual('djdlkfjdslkf', bomref2.value)

def test_discriminate_and_reset_manually(self) -> None:
bomref1 = BomRef('djdlkfjdslkf')
bomref2 = BomRef('djdlkfjdslkf')
self.assertEqual(bomref1.value, bomref2.value, 'blank')
discr = BomRefDiscriminator([bomref1, bomref2])
self.assertEqual(bomref1.value, bomref2.value, 'init')
with discr:
self.assertNotEqual(bomref1.value, bomref2.value, 'should be discriminated')
discr.reset()
self.assertEqual('djdlkfjdslkf', bomref1.value)
self.assertEqual('djdlkfjdslkf', bomref2.value)
11 changes: 10 additions & 1 deletion tests/test_output_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) OWASP Foundation. All Rights Reserved.


import re
from typing import Any, Callable
from unittest import TestCase
from unittest.mock import Mock, patch
Expand All @@ -29,7 +31,7 @@
from cyclonedx.schema import OutputFormat, SchemaVersion
from cyclonedx.validation.json import JsonStrictValidator
from tests import SnapshotMixin, mksname, uuid_generator
from tests._data.models import all_get_bom_funct_invalid, all_get_bom_funct_valid
from tests._data.models import all_get_bom_funct_invalid, all_get_bom_funct_valid, bom_all_same_bomref

UNSUPPORTED_SV = frozenset((SchemaVersion.V1_1, SchemaVersion.V1_0,))

Expand Down Expand Up @@ -80,6 +82,13 @@ def test_invalid(self, get_bom: Callable[[], Bom], sv: SchemaVersion) -> None:
return None # expected
raise error.exception

def test_bomref_not_duplicate(self) -> None:
bom, nr_bomrefs = bom_all_same_bomref()
output = BY_SCHEMA_VERSION[SchemaVersion.V1_4](bom).output_as_string()
found = re.findall(r'"bom-ref":\s*"(.*?)"', output)
self.assertEqual(nr_bomrefs, len(found))
self.assertCountEqual(set(found), found, 'expected unique items')


@ddt
class TestFunctionalBySchemaVersion(TestCase):
Expand Down
11 changes: 9 additions & 2 deletions tests/test_output_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) OWASP Foundation. All Rights Reserved.


import re
from typing import Any, Callable
from unittest import TestCase
from unittest.mock import Mock, patch
Expand All @@ -29,7 +29,7 @@
from cyclonedx.schema import OutputFormat, SchemaVersion
from cyclonedx.validation.xml import XmlValidator
from tests import SnapshotMixin, mksname, uuid_generator
from tests._data.models import all_get_bom_funct_invalid, all_get_bom_funct_valid
from tests._data.models import all_get_bom_funct_invalid, all_get_bom_funct_valid, bom_all_same_bomref


@ddt
Expand Down Expand Up @@ -68,6 +68,13 @@ def test_invalid(self, get_bom: Callable[[], Bom], sv: SchemaVersion) -> None:
return None # expected
raise error.exception

def test_bomref_not_duplicate(self) -> None:
bom, nr_bomrefs = bom_all_same_bomref()
output = BY_SCHEMA_VERSION[SchemaVersion.V1_4](bom).output_as_string()
found = re.findall(r'bom-ref="(.*?)"', output)
self.assertEqual(nr_bomrefs, len(found))
self.assertCountEqual(set(found), found, 'expected unique items')


@ddt
class TestFunctionalBySchemaVersion(TestCase):
Expand Down

0 comments on commit a648775

Please sign in to comment.