Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: environment - gather declared license information according to PEP639 #755

Merged
merged 36 commits into from
Jun 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions cyclonedx_py/_internal/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from .utils.cdx import licenses_fixup, make_bom
from .utils.packaging import metadata2extrefs, metadata2licenses, normalize_packagename
from .utils.pep610 import PackageSourceArchive, PackageSourceVcs, packagesource2extref, packagesource4dist
from .utils.pep639 import dist2licenses as dist2licenses_pep639
from .utils.pyproject import pyproject2component, pyproject2dependencies, pyproject_load

if TYPE_CHECKING: # pragma: no cover
Expand Down Expand Up @@ -102,6 +103,16 @@ def make_argument_parser(**kwargs: Any) -> 'ArgumentParser':
• Build an SBOM from PDM environment:
$ %(prog)s "$(pdm info --python)"
""")
p.add_argument('--PEP-639',
action='store_true',
dest='pep639',
help='Enable license gathering according to PEP 639 '
'(improving license clarity with better package metadata).\n'
'The behavior may change during the draft development of the PEP.')
p.add_argument('--gather-license-texts',
action='store_true',
dest='gather_license_texts',
help='Enable license text gathering.')
add_argument_pyproject(p)
add_argument_mc_type(p)
# TODO possible additional switch:
Expand All @@ -118,8 +129,12 @@ def make_argument_parser(**kwargs: Any) -> 'ArgumentParser':

def __init__(self, *,
logger: 'Logger',
pep639: bool,
gather_license_texts: bool,
**__: Any) -> None:
self._logger = logger
self._pep639 = pep639
self._gather_license_texts = gather_license_texts

def __call__(self, *, # type:ignore[override]
python: Optional[str],
Expand Down Expand Up @@ -167,6 +182,11 @@ def __add_components(self, bom: 'Bom',
external_references=metadata2extrefs(dist_meta),
# path of dist-package on disc? naaa... a package may have multiple files/folders on disc
)
if self._pep639:
component.licenses.update(
dist2licenses_pep639(dist,
self._gather_license_texts,
self._logger))
del dist_meta, dist_name, dist_version
self.__component_add_extref_and_purl(component, packagesource4dist(dist))
all_components[normalize_packagename(component.name)] = (
Expand Down
38 changes: 38 additions & 0 deletions cyclonedx_py/_internal/utils/mimetypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# This file is part of CycloneDX Python Lib
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) OWASP Foundation. All Rights Reserved.

from mimetypes import guess_type as _stdlib_guess_type
from os.path import splitext
from typing import Optional

_ext_mime_map = {
# https://www.iana.org/assignments/media-types/media-types.xhtml
'md': 'text/markdown',
'txt': 'text/plain',
'rst': 'text/prs.fallenstein.rst',
# add more mime types. pull-requests welcome!
}


def guess_type(file_name: str) -> Optional[str]:
"""
The stdlib `mimetypes.guess_type()` is inconsistent, as it depends heavily on type registry in the env/os.
Therefore, this polyfill exists.
"""
ext = splitext(file_name)[1][1:].lower()
return _ext_mime_map.get(ext) \
or _stdlib_guess_type(file_name)[0]
6 changes: 2 additions & 4 deletions cyclonedx_py/_internal/utils/packaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ def metadata2licenses(metadata: 'PackageMetadata') -> Generator['License', None,
# see spec: https://packaging.python.org/en/latest/specifications/core-metadata/#classifier-multiple-use
classifiers: List[str] = metadata.get_all('Classifier') # type:ignore[assignment]
yield from classifiers2licenses(classifiers, lfac, lack)
for mlicense in metadata.get_all('License', ()):
# see spec: https://packaging.python.org/en/latest/specifications/core-metadata/#license
for mlicense in set(metadata.get_all('License', ())):
# see spec: https://packaging.python.org/en/latest/specifications/core-metadata/#license
if len(mlicense) <= 0:
continue
license = lfac.make_from_string(mlicense,
Expand All @@ -57,8 +57,6 @@ def metadata2licenses(metadata: 'PackageMetadata') -> Generator['License', None,
text=AttachedText(content=mlicense))
else:
yield license
# TODO: iterate over "License-File" declarations and read them
# for mlfile in metadata.get_all('License-File'): ...


def metadata2extrefs(metadata: 'PackageMetadata') -> Generator['ExternalReference', None, None]:
Expand Down
80 changes: 80 additions & 0 deletions cyclonedx_py/_internal/utils/pep639.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# This file is part of CycloneDX Python Lib
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) OWASP Foundation. All Rights Reserved.

"""
Functionality related to PEP 639.

See https://peps.python.org/pep-0639/
"""

from base64 import b64encode
from os.path import join
from typing import TYPE_CHECKING, Generator

from cyclonedx.factory.license import LicenseFactory
from cyclonedx.model import AttachedText, Encoding
from cyclonedx.model.license import DisjunctiveLicense, LicenseAcknowledgement

from .mimetypes import guess_type

if TYPE_CHECKING: # pragma: no cover
from importlib.metadata import Distribution
from logging import Logger

from cyclonedx.model.license import License


def dist2licenses(
dist: 'Distribution',
gather_text: bool,
logger: 'Logger'
) -> Generator['License', None, None]:
lfac = LicenseFactory()
lack = LicenseAcknowledgement.DECLARED
metadata = dist.metadata # see https://packaging.python.org/en/latest/specifications/core-metadata/
if (lexp := metadata['License-Expression']) is not None:
# see spec: https://peps.python.org/pep-0639/#add-license-expression-field
yield lfac.make_from_string(lexp,
license_acknowledgement=lack)
if gather_text:
for mlfile in set(metadata.get_all('License-File', ())):
# see spec: https://peps.python.org/pep-0639/#add-license-file-field
# latest spec rev: https://discuss.python.org/t/pep-639-round-3-improving-license-clarity-with-better-package-metadata/53020 # noqa: E501

# per spec > license files are stored in the `.dist-info/licenses/` subdirectory of the produced wheel.
# but in practice, other locations are used, too.
content = dist.read_text(join('licenses', mlfile)) \
or dist.read_text(join('license_files', mlfile)) \
or dist.read_text(mlfile)
if content is None: # pragma: no cover
logger.debug('Error: failed to read license file %r for dist %r',
mlfile, metadata['Name'])
continue
encoding = None
content_type = guess_type(mlfile) or AttachedText.DEFAULT_CONTENT_TYPE
# per default, license files are human-readable texts.
if not content_type.startswith('text/'):
encoding = Encoding.BASE_64
content = b64encode(content.encode('utf-8')).decode('ascii')
yield DisjunctiveLicense(
name=f'declared license file: {mlfile}',
acknowledgement=lack,
text=AttachedText(
content=content,
encoding=encoding,
content_type=content_type
))
5 changes: 5 additions & 0 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ The full documentation can be issued by running with ``environment --help``:

$ cyclonedx-py environment --help
usage: cyclonedx-py environment [-h] [-v]
[--PEP-639] [--gather-license-texts]
[--short-PURLs] [--output-reproducible]
[--validate | --no-validate]
[-o <file>] [--sv <version>] [--of <format>]
Expand All @@ -65,6 +66,10 @@ The full documentation can be issued by running with ``environment --help``:

options:
-h, --help show this help message and exit
--PEP-639 Enable license gathering according to PEP 639 (improving license clarity with better package metadata).
The behavior may change during the draft development of the PEP.
--gather-license-texts
Enable license text gathering.
--pyproject <file> Path to the root component's `pyproject.toml` file.
This should point to a file compliant with PEP 621 (storing project metadata).
--mc-type <type> Type of the main component
Expand Down
64 changes: 64 additions & 0 deletions tests/_data/infiles/environment/with-license-pep639/init.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""
initialize this testbed.
"""

from os import name as os_name
from os.path import dirname, join
from subprocess import PIPE, CompletedProcess, run # nosec:B404
from sys import argv, executable
from typing import Any
from venv import EnvBuilder

__all__ = ['main']

this_dir = dirname(__file__)
env_dir = join(this_dir, '.venv')
constraint_file = join(this_dir, 'pinning.txt')


def pip_run(*args: str, **kwargs: Any) -> CompletedProcess:
# pip is not API, but a CLI -- call it like that!
call = (
executable, '-m', 'pip',
'--python', env_dir,
*args
)
print('+ ', *call)
res = run(call, **kwargs, cwd=this_dir, shell=False) # nosec:B603
if res.returncode != 0:
raise RuntimeError('process failed')
return res


def pip_install(*args: str) -> None:
pip_run(
'install', '--require-virtualenv', '--no-input', '--progress-bar=off', '--no-color',
'-c', constraint_file, # needed for reproducibility
*args
)


def main() -> None:
EnvBuilder(
system_site_packages=False,
symlinks=os_name != 'nt',
with_pip=False,
).create(env_dir)

pip_install(
# with License-Expression
'attrs',
# with License-File
'boolean.py',
'jsonpointer',
'license_expression',
'lxml',
)


if __name__ == '__main__':
main()
if '--pin' in argv:
res = pip_run('freeze', '--all', '--local', stdout=PIPE)
with open(constraint_file, 'wb') as cf:
cf.write(res.stdout)
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
attrs==23.2.0
boolean.py==4.0
jsonpointer==2.4
license-expression==30.3.0
lxml==5.2.2
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[project]
# https://packaging.python.org/en/latest/specifications/declaring-project-metadata/#declaring-project-metadata
name = "with-extras"
version = "0.1.0"
description = "depenndencies with license declaration accoring to PEP 639"

dependencies = [
# with License-Expression
"attrs",
# with License-File
"boolean.py",
"jsonpointer",
"license_expression",
"lxml",
]

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading