From 50d90d9f16605eb6d7c0df66237fbe5f8e905f01 Mon Sep 17 00:00:00 2001 From: Pavel Moravec Date: Mon, 12 Feb 2024 08:30:14 +0100 Subject: [PATCH 1/2] [cleaner] Add option to skip cleaning files A new option --skip-cleaning-files / --skip-masking-files allows cleaner to skip cleaning files where the user is certain no sensitive information is present. The option supports globs / wildcards. Relevant: #3469 Closes: #3520 Signed-off-by: Pavel Moravec --- man/en/sos-clean.1 | 10 ++++++++ sos/cleaner/__init__.py | 33 +++++++++++++++++--------- sos/cleaner/archives/__init__.py | 1 - sos/cleaner/parsers/__init__.py | 14 ++++++----- sos/cleaner/parsers/hostname_parser.py | 4 ++-- sos/cleaner/parsers/ip_parser.py | 6 ++--- sos/cleaner/parsers/ipv6_parser.py | 6 ++--- sos/cleaner/parsers/keyword_parser.py | 4 ++-- sos/cleaner/parsers/mac_parser.py | 6 ++--- sos/cleaner/parsers/username_parser.py | 4 ++-- sos/collector/__init__.py | 6 +++++ sos/report/__init__.py | 6 +++++ 12 files changed, 67 insertions(+), 33 deletions(-) diff --git a/man/en/sos-clean.1 b/man/en/sos-clean.1 index c51f327629..fe3a1d8d76 100644 --- a/man/en/sos-clean.1 +++ b/man/en/sos-clean.1 @@ -5,6 +5,7 @@ sos clean - Obfuscate sensitive data from one or more sosreports .B sos clean TARGET [options] [\-\-domains] [\-\-disable-parsers] + [\-\-skip-cleaning-files|\-\-skip-masking-files] [\-\-keywords] [\-\-keyword-file] [\-\-map-file] @@ -63,6 +64,15 @@ trust in the party/parties that may handle the generated report. Valid values for this option are currently: \fBhostname\fR, \fBip\fR, \fBipv6\fR, \fBmac\fR, \fBkeyword\fR, and \fBusername\fR. .TP +.B \-\-skip-cleaning-files, \-\-skip-masking-files FILES +Provide a comma-delimited list of files inside an archive, that cleaner should skip in cleaning. + +Globs like asterisk are supported, so \fBsos_commands/host/hostname*\fR will match all three +usual filenames in that directory (\fBhostname\fR, \fBhostnamectl_status\fR and \fBhostname_-f\fR). + +Use this option with caution, only when being certain the given files do not contain any sensitive +information. +.TP .B \-\-keywords KEYWORDS Provide a comma-delimited list of keywords to scrub in addition to the default parsers. diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index e4ab0a15db..c4fd53afb0 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -15,6 +15,7 @@ import shutil import sos.cleaner.preppers import tempfile +import fnmatch from concurrent.futures import ThreadPoolExecutor from datetime import datetime @@ -81,6 +82,7 @@ class SoSCleaner(SoSComponent): 'archive_type': 'auto', 'domains': [], 'disable_parsers': [], + 'skip_clean_files': [], 'jobs': 4, 'keywords': [], 'keyword_file': None, @@ -116,7 +118,7 @@ def __init__(self, parser=None, args=None, cmdline=None, in_place=False, # when obfuscating a SoSCollector run during archive extraction os.makedirs(os.path.join(self.tmpdir, 'cleaner'), exist_ok=True) - self.validate_parser_values() + self.review_parser_values() self.cleaner_mapping = self.load_map_file() os.umask(0o77) @@ -125,13 +127,14 @@ def __init__(self, parser=None, args=None, cmdline=None, in_place=False, self.cleaner_md = self.manifest.components.add_section('cleaner') + skip_clean_files = self.opts.skip_clean_files self.parsers = [ - SoSHostnameParser(self.cleaner_mapping), - SoSIPParser(self.cleaner_mapping), - SoSIPv6Parser(self.cleaner_mapping), - SoSMacParser(self.cleaner_mapping), - SoSKeywordParser(self.cleaner_mapping), - SoSUsernameParser(self.cleaner_mapping) + SoSHostnameParser(self.cleaner_mapping, skip_clean_files), + SoSIPParser(self.cleaner_mapping, skip_clean_files), + SoSIPv6Parser(self.cleaner_mapping, skip_clean_files), + SoSMacParser(self.cleaner_mapping, skip_clean_files), + SoSKeywordParser(self.cleaner_mapping, skip_clean_files), + SoSUsernameParser(self.cleaner_mapping, skip_clean_files) ] for _parser in self.opts.disable_parsers: @@ -262,6 +265,11 @@ def add_parser_options(cls, parser): default=[], dest='disable_parsers', help=('Disable specific parsers, so that those ' 'elements are not obfuscated')) + clean_grp.add_argument('--skip-cleaning-files', '--skip-masking-files', + action='extend', default=[], + dest='skip_clean_files', + help=('List of files to skip/ignore during ' + 'cleaning. Globs are supported.')) clean_grp.add_argument('-j', '--jobs', default=4, type=int, help='Number of concurrent archives to clean') clean_grp.add_argument('--keywords', action='extend', default=[], @@ -323,10 +331,11 @@ def inspect_target_archive(self): if self.nested_archive: self.nested_archive.ui_name = self.nested_archive.description - def validate_parser_values(self): - """Check any values passed to the parsers via the commandline, e.g. - the --domains option, to ensure that they are valid for the parser in - question. + def review_parser_values(self): + """Check any values passed to the parsers via the commandline: + - For the --domains option, ensure that they are valid for the parser + in question. + - Convert --skip-cleaning-files from globs to regular expressions. """ for _dom in self.opts.domains: if len(_dom.split('.')) < 2: @@ -334,6 +343,8 @@ def validate_parser_values(self): f"Invalid value '{_dom}' given: --domains values must be " "actual domains" ) + self.opts.skip_clean_files = [fnmatch.translate(p) for p in + self.opts.skip_clean_files] def execute(self): """SoSCleaner will begin by inspecting the TARGET option to determine diff --git a/sos/cleaner/archives/__init__.py b/sos/cleaner/archives/__init__.py index a729862d57..c6a4c5c16e 100644 --- a/sos/cleaner/archives/__init__.py +++ b/sos/cleaner/archives/__init__.py @@ -50,7 +50,6 @@ class SoSObfuscationArchive(): type_name = 'undetermined' description = 'undetermined' is_nested = False - skip_files = [] prep_files = {} def __init__(self, archive_path, tmpdir): diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py index a1057df9fb..14a3ef7fde 100644 --- a/sos/cleaner/parsers/__init__.py +++ b/sos/cleaner/parsers/__init__.py @@ -42,22 +42,24 @@ class SoSCleanerParser(): name = 'Undefined Parser' regex_patterns = [] skip_line_patterns = [] - skip_files = [] + parser_skip_files = [] # list of skip files relevant to a parser + skip_clean_files = [] # list of global skip files from cmdline arguments map_file_key = 'unset' compile_regexes = True - def __init__(self, config={}): + def __init__(self, config={}, skip_clean_files=[]): if self.map_file_key in config: self.mapping.conf_update(config[self.map_file_key]) + self.skip_clean_files = skip_clean_files self._generate_skip_regexes() def _generate_skip_regexes(self): - """Generate the regexes for the parser's configured `skip_files`, - so that we don't regenerate them on every file being examined for if - the parser should skip a given file. + """Generate the regexes for the parser's configured parser_skip_files + or global skip_clean_files, so that we don't regenerate them on every + file being examined for if the parser should skip a given file. """ self.skip_patterns = [] - for p in self.skip_files: + for p in self.parser_skip_files + self.skip_clean_files: self.skip_patterns.append(re.compile(p)) def generate_item_regexes(self): diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py index a739629844..642aa05d29 100644 --- a/sos/cleaner/parsers/hostname_parser.py +++ b/sos/cleaner/parsers/hostname_parser.py @@ -21,9 +21,9 @@ class SoSHostnameParser(SoSCleanerParser): r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))' ] - def __init__(self, config): + def __init__(self, config, skip_clean_files=[]): self.mapping = SoSHostnameMap() - super(SoSHostnameParser, self).__init__(config) + super(SoSHostnameParser, self).__init__(config, skip_clean_files) def parse_line(self, line): """This will be called for every line in every file we process, so that diff --git a/sos/cleaner/parsers/ip_parser.py b/sos/cleaner/parsers/ip_parser.py index d5522ac237..f6d464a513 100644 --- a/sos/cleaner/parsers/ip_parser.py +++ b/sos/cleaner/parsers/ip_parser.py @@ -25,7 +25,7 @@ class SoSIPParser(SoSCleanerParser): r'.*dnf\[.*\]:' ] - skip_files = [ + parser_skip_files = [ # skip these as version numbers will frequently look like IP addresses # when using regex matching 'installed-debs', @@ -44,6 +44,6 @@ class SoSIPParser(SoSCleanerParser): map_file_key = 'ip_map' compile_regexes = False - def __init__(self, config): + def __init__(self, config, skip_clean_files=[]): self.mapping = SoSIPMap() - super(SoSIPParser, self).__init__(config) + super(SoSIPParser, self).__init__(config, skip_clean_files) diff --git a/sos/cleaner/parsers/ipv6_parser.py b/sos/cleaner/parsers/ipv6_parser.py index b209c646d1..dfd7282a1b 100644 --- a/sos/cleaner/parsers/ipv6_parser.py +++ b/sos/cleaner/parsers/ipv6_parser.py @@ -29,15 +29,15 @@ class SoSIPv6Parser(SoSCleanerParser): r"(([0-9a-f]{1,4}(:[0-9a-f]{0,4}){0,5}))([^.])::(([0-9a-f]{1,4}" r"(:[0-9a-f]{1,4}){0,5})?))(/\d{1,3})?(?![:\\a-z0-9])" ] - skip_files = [ + parser_skip_files = [ 'etc/dnsmasq.conf.*', '.*modinfo.*', ] compile_regexes = False - def __init__(self, config): + def __init__(self, config, skip_clean_files=[]): self.mapping = SoSIPv6Map() - super(SoSIPv6Parser, self).__init__(config) + super(SoSIPv6Parser, self).__init__(config, skip_clean_files) def get_map_contents(self): """Structure the dataset contents properly so that they can be reloaded diff --git a/sos/cleaner/parsers/keyword_parser.py b/sos/cleaner/parsers/keyword_parser.py index f611ccd2b1..3c6c442b8b 100644 --- a/sos/cleaner/parsers/keyword_parser.py +++ b/sos/cleaner/parsers/keyword_parser.py @@ -20,9 +20,9 @@ class SoSKeywordParser(SoSCleanerParser): name = 'Keyword Parser' map_file_key = 'keyword_map' - def __init__(self, config): + def __init__(self, config, skip_clean_files=[]): self.mapping = SoSKeywordMap() - super(SoSKeywordParser, self).__init__(config) + super(SoSKeywordParser, self).__init__(config, skip_clean_files) def _parse_line(self, line): return line, 0 diff --git a/sos/cleaner/parsers/mac_parser.py b/sos/cleaner/parsers/mac_parser.py index 4e790018e9..74f95a6aaa 100644 --- a/sos/cleaner/parsers/mac_parser.py +++ b/sos/cleaner/parsers/mac_parser.py @@ -43,15 +43,15 @@ class SoSMacParser(SoSCleanerParser): '53:4f:53', '534f:53' ) - skip_files = [ + parser_skip_files = [ 'sos_commands/.*/modinfo.*' ] map_file_key = 'mac_map' compile_regexes = False - def __init__(self, config): + def __init__(self, config, skip_clean_files=[]): self.mapping = SoSMacMap() - super(SoSMacParser, self).__init__(config) + super(SoSMacParser, self).__init__(config, skip_clean_files) def reduce_mac_match(self, match): """Strips away leading and trailing non-alphanum characters from any diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py index 5909f52d39..c999ff55ef 100644 --- a/sos/cleaner/parsers/username_parser.py +++ b/sos/cleaner/parsers/username_parser.py @@ -26,9 +26,9 @@ class SoSUsernameParser(SoSCleanerParser): map_file_key = 'username_map' regex_patterns = [] - def __init__(self, config): + def __init__(self, config, skip_clean_files=[]): self.mapping = SoSUsernameMap() - super(SoSUsernameParser, self).__init__(config) + super(SoSUsernameParser, self).__init__(config, skip_clean_files) def _parse_line(self, line): return line, 0 diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py index ff0c1ab7c2..ee4613f125 100644 --- a/sos/collector/__init__.py +++ b/sos/collector/__init__.py @@ -87,6 +87,7 @@ class SoSCollector(SoSComponent): 'group': None, 'image': '', 'force_pull_image': True, + 'skip_clean_files': [], 'jobs': 4, 'journal_size': 0, 'keywords': [], @@ -483,6 +484,11 @@ def add_parser_options(cls, parser): default=[], dest='disable_parsers', help=('Disable specific parsers, so that ' 'those elements are not obfuscated')) + cleaner_grp.add_argument('--skip-cleaning-files', + '--skip-masking-files', action='extend', + default=[], dest='skip_clean_files', + help=('List of files to skip/ignore during ' + 'cleaning. Globs are supported.')) cleaner_grp.add_argument('--keywords', action='extend', default=[], dest='keywords', help='List of keywords to obfuscate') diff --git a/sos/report/__init__.py b/sos/report/__init__.py index 77087ed19c..7dce29a662 100644 --- a/sos/report/__init__.py +++ b/sos/report/__init__.py @@ -88,6 +88,7 @@ class SoSReport(SoSComponent): 'desc': '', 'domains': [], 'disable_parsers': [], + 'skip_clean_files': [], 'dry_run': False, 'estimate_only': False, 'experimental': False, @@ -358,6 +359,11 @@ def add_parser_options(cls, parser): default=[], dest='disable_parsers', help=('Disable specific parsers, so that ' 'those elements are not obfuscated')) + cleaner_grp.add_argument('--skip-cleaning-files', + '--skip-masking-files', action='extend', + default=[], dest='skip_clean_files', + help=('List of files to skip/ignore during ' + 'cleaning. Globs are supported.')) cleaner_grp.add_argument('--keywords', action='extend', default=[], dest='keywords', help='List of keywords to obfuscate') From 2b207e5949b74414b32beffdfeb4c0bcacb5eb91 Mon Sep 17 00:00:00 2001 From: Pavel Moravec Date: Mon, 11 Mar 2024 15:50:11 +0100 Subject: [PATCH 2/2] [tests] Add tests for cleaner's --skip-cleaning-files Closes: #3469 Signed-off-by: Pavel Moravec --- .../basic_function_tests/report_with_mask.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/cleaner_tests/basic_function_tests/report_with_mask.py b/tests/cleaner_tests/basic_function_tests/report_with_mask.py index baee836a21..06bb212246 100644 --- a/tests/cleaner_tests/basic_function_tests/report_with_mask.py +++ b/tests/cleaner_tests/basic_function_tests/report_with_mask.py @@ -72,13 +72,15 @@ def test_perms_unchanged_on_modified_file(self): self.assertEqual(imode_orig, imode_obfuscated) -class ReportWithCleanedKeywords(StageOneReportTest): - """Testing for obfuscated keywords provided by the user +class ReportWithUserCustomisations(StageOneReportTest): + """Testing for 1) obfuscated keywords provided by the user (--keywords option), + and 2) skipping to clean specific files (--skip-cleaning-files option) :avocado: tags=stageone """ - sos_cmd = '--clean -o filesys,kernel --keywords=fstab,Linux,tmp --no-update' + sos_cmd = '--clean -o filesys,kernel --keywords=fstab,Linux,tmp,BOOT_IMAGE,fs.dentry-state \ + --skip-cleaning-files proc/cmdline,sos_commands/*/sysctl* --no-update' # Will the 'tmp' be properly treated in path to working dir without raising an error? # To make this test effective, we assume the test runs on a system / with Policy @@ -96,6 +98,12 @@ def test_filename_obfuscated(self): def test_keyword_obfuscated_in_file(self): self.assertFileNotHasContent('sos_commands/kernel/uname_-a', 'Linux') + def test_skip_cleaning_single_file(self): + self.assertFileHasContent('proc/cmdline', 'BOOT_IMAGE') + + def test_skip_cleaning_glob_file(self): + self.assertFileHasContent('sos_commands/kernel/sysctl_-a', 'fs.dentry-state') + class DefaultRemoveBinaryFilesTest(StageTwoReportTest): """Testing that binary files are removed by default