Hi, Vladislav, Thanks! See comments below On Dec 02, Vladislav Vaintroub wrote:
revision-id: 5b6b23563bf (mariadb-11.7.1-5-g5b6b23563bf) parent(s): eff9c198e32 author: Vladislav Vaintroub committer: Vladislav Vaintroub timestamp: 2024-11-25 19:04:18 +0100 message:
MDEV-34979 generate SBOM from server builds
This commit adds the capability to generate a Software Bill of Materials (SBOM) from server builds.
It introduces a new WITH_SBOM variable, which defaults to ON for package builds (i.e if BUILD_CONFIG is used) and to OFF otherwise.
When enabled, the build process will produce an sbom.json document in CycloneDX format, capturing information about various dependencies, which is gathered from various sources.
We use git submodule information and CMake external projects properties to gather version information for 3rd party code, but also handle dependencies if external code is part of our repository (zlib, or Connect storage engine's minizip)
The SBOM document is stored in the root build directory in sbom.json file, but is not currently installed.
diff --git a/CMakeLists.txt b/CMakeLists.txt --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -581,6 +581,13 @@ ENDIF()
INCLUDE(build_depends)
+INCLUDE(CMakeDependentOption) +CMAKE_DEPENDENT_OPTION(WITH_SBOM "Generate Software Bill of Materials (SBOM)" OFF "BUILD_CONFIG" ON) +IF(WITH_SBOM) + INCLUDE(generate_sbom) + GENERATE_SBOM() +ENDIF()
This doesn't work for me, I mean, cmake -DBUILD_CONFIG=mysql_release . does not generate the file, but cmake -DBUILD_CONFIG=mysql_release . -DWITH_SBOM=ON does. May be it'd be more robust to use OPTION() and set it from cmake/build_configuration/mysql_release.cmake? Like it's done for other options.
+ INCLUDE(CPack)
IF(WIN32 AND SIGNCODE) diff --git a/cmake/generate_submodule_info.cmake b/cmake/generate_submodule_info.cmake --- /dev/null +++ b/cmake/generate_submodule_info.cmake @@ -0,0 +1,73 @@ ... + FOREACH(s ${out_list}) + IF(${counter} EQUAL 0) + IF (NOT("${s}" MATCHES "Entering '"))
does it depend on the locale?
+ MESSAGE(FATAL "Unexpected output ${outvar}") + ENDIF() + STRING(LENGTH "${s}" slen) + MATH(EXPR substr_len "${slen} - 11") ... diff --git a/cmake/generate_sbom.cmake b/cmake/generate_sbom.cmake --- /dev/null +++ b/cmake/generate_sbom.cmake @@ -0,0 +1,240 @@ +INCLUDE(generate_submodule_info) +INCLUDE(ExternalProject) + +# Remove all matches from a cmake list +FUNCTION (REMOVE_ALL_MATCHES list_var_name match_str) + SET(new_list) + FOREACH(el ${${list_var_name}}) + IF(NOT("${el}" MATCHES "${match_str}")) + LIST(APPEND new_list ${el}) + ENDIF() + ENDFOREACH() + SET(${list_var_name} ${new_list} PARENT_SCOPE) +ENDFUNCTION()
you could've used LIST(FILTER ...) instead. (but not LIST(REMOVE_ITEM ...) which is 3.16+)
+ +# Extract user name and repository name from a github URL. +FUNCTION (EXTRACT_REPO_NAME_AND_USER repo_url repo_name_var repo_user_var) + IF(repo_url MATCHES "^git@") + # normalize to https-style URLs + STRING(REGEX REPLACE "^git@([^:]+):(.*)$" "https://\\1/\\2" repo_url "${repo_url}")
I don't think we have that, is it even tested? :) note, I'm not saying it isn't needed, better to have it than not.
+ ENDIF() + # Extract the repository user + STRING(REGEX REPLACE "https://([^/]+)/([^/]+)/.*" "\\2" repo_user "${repo_url}") + + STRING(REGEX REPLACE ".*/([^/]*)$" "\\1" repo_name "${repo_url}") + STRING(REGEX REPLACE "\\.git$" "" repo_name "${repo_name}") + + SET(${repo_name_var} ${repo_name} PARENT_SCOPE) + SET(${repo_user_var} ${repo_user} PARENT_SCOPE) +ENDFUNCTION() + +# Add a known 3rd party dependency for SBOM generation +# Currently used for "vendored" (part of our repository) source code we know about +# such as zlib, as well ExternalProject_Add() projects +MACRO(ADD_THIRD_PARTY_DEPENDENCY name url tag rev version description) + LIST(FIND ALL_THIRD_PARTY ${name} idx) + IF (idx GREATER -1) + MESSAGE(FATAL_ERROR "${name} is already in ALL_THIRD_PARTY") + ENDIF() + SET(${name}_URL ${url}) + SET(${name}_TAG ${tag}) + SET(${name}_REVISION ${rev}) + SET(${name}_DESCRIPTION "${description}") + SET(${name}_VERSION "${version}") + LIST(APPEND ALL_THIRD_PARTY ${name}) +ENDMACRO() + +# Match third party component with supplier +# CyclonDX documentation says it is +# "The organization that supplied the component. +# The supplier may often be the manufacturer, but may also be a distributor or repackager." +# +# Perhaps it can always be "MariaDB", but security team recommendation is different +# more towards "author" +FUNCTION (sbom_get_supplier repo_name repo_user varname)
why a function? just curious
+ IF("${repo_name_SUPPLIER}") + SET(${varname} "${repo_name_SUPPLIER}" PARENT_SCOPE) ... + # It is possible to provide EXTRA_SBOM_DEPENDENCIES + # and accompanying per-dependency data, to extend generared sbom + # document. + # Example below injects an extra "ncurses" dependency using several + # command line parameters for CMake. + # -DEXTRA_SBOM_DEPENDENCIES=ncurses + # -Dncurses_URL=https://github.com/mirror/ncurses + # -Dncurses_TAG=v6.4 + # -Dncurses_VERSION=6.4 + # -Dncurses_DESCRIPTION="A fake extra dependency" + SET(ALL_THIRD_PARTY ${ALL_SUBMODULES} ${EXTRA_SBOM_DEPENDENCIES})
okay. Though I personally wouldn't use ALL_SUBMODULES and EXTRA_SBOM_DEPENDENCIES and instead would scan GET_CMAKE_PROPERTY(VARS VARIABLES) for *_TAG or something
+ + IF(TARGET ha_connect OR TARGET connect) + ADD_THIRD_PARTY_DEPENDENCY(minizip + "https://github.com/zlib-ng/minizip-ng" + "" 252588f "" + "Vendored minizip, inside connect storage engine, storage/connect/zip.c et al")
did you verify that our file matches the content of the file from that repo at that commit? also, a though - how can we prevent our minigzip.c from being modified without updaing the dependency? Suggestion, check the commit hash, like git log -n1 --format=%h storage/connect/zip.c
+ ENDIF() + + IF(TARGET libfmt) + ExternalProject_GET_PROPERTY(libfmt URL) + STRING(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" libmt_TAG "${URL}") + ADD_THIRD_PARTY_DEPENDENCY(libfmt + "https://github.com/fmtlib/fmt" + "${libmt_TAG}" "${libmt_TAG}" "${libmt_TAG}" + "header only library, used in server") + ENDIF()
I wonder, wouldn't it be simpler and more robust to overwrite or wrap ExternalProject() to do the above automatically? (using the same name or a different one like MyExternalProject) ... Also, I noticed that the server entry gets "purl": "pkg:github//@5b6b23563bf" perhaps it should have a repo name here? Regards, Sergei Chief Architect, MariaDB Server and security@mariadb.org