From 1ccdcf8aa3eacea2013d98c328a3a9c1558c1056 Mon Sep 17 00:00:00 2001 From: Pavel Moravec Date: Mon, 12 Feb 2024 08:30:14 +0100 Subject: [PATCH] [cleaner] Add option to skip cleaning files A new option --skip-clean-files allows cleaner to skip cleaning files where the user is certain no sensitive information is present. Te option supports globs / wildcards. Relevant: #3469 Closes: #3520 Signed-off-by: Pavel Moravec --- sos/cleaner/__init__.py | 32 +++++++++++++++++--------- sos/cleaner/archives/__init__.py | 1 - sos/cleaner/parsers/__init__.py | 14 ++++++----- sos/cleaner/parsers/hostname_parser.py | 4 ++-- sos/cleaner/parsers/ip_parser.py | 6 ++--- sos/cleaner/parsers/ipv6_parser.py | 6 ++--- sos/cleaner/parsers/keyword_parser.py | 4 ++-- sos/cleaner/parsers/mac_parser.py | 6 ++--- sos/cleaner/parsers/username_parser.py | 4 ++-- sos/collector/__init__.py | 5 ++++ sos/report/__init__.py | 5 ++++ 11 files changed, 54 insertions(+), 33 deletions(-) diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index e4ab0a15db..363eeb3960 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -15,6 +15,7 @@ import shutil import sos.cleaner.preppers import tempfile +import fnmatch from concurrent.futures import ThreadPoolExecutor from datetime import datetime @@ -81,6 +82,7 @@ class SoSCleaner(SoSComponent): 'archive_type': 'auto', 'domains': [], 'disable_parsers': [], + 'skip_clean_files': [], 'jobs': 4, 'keywords': [], 'keyword_file': None, @@ -116,7 +118,7 @@ def __init__(self, parser=None, args=None, cmdline=None, in_place=False, # when obfuscating a SoSCollector run during archive extraction os.makedirs(os.path.join(self.tmpdir, 'cleaner'), exist_ok=True) - self.validate_parser_values() + self.review_parser_values() self.cleaner_mapping = self.load_map_file() os.umask(0o77) @@ -125,13 +127,14 @@ def __init__(self, parser=None, args=None, cmdline=None, in_place=False, self.cleaner_md = self.manifest.components.add_section('cleaner') + skip_clean_files = self.opts.skip_clean_files self.parsers = [ - SoSHostnameParser(self.cleaner_mapping), - SoSIPParser(self.cleaner_mapping), - SoSIPv6Parser(self.cleaner_mapping), - SoSMacParser(self.cleaner_mapping), - SoSKeywordParser(self.cleaner_mapping), - SoSUsernameParser(self.cleaner_mapping) + SoSHostnameParser(self.cleaner_mapping, skip_clean_files), + SoSIPParser(self.cleaner_mapping, skip_clean_files), + SoSIPv6Parser(self.cleaner_mapping, skip_clean_files), + SoSMacParser(self.cleaner_mapping, skip_clean_files), + SoSKeywordParser(self.cleaner_mapping, skip_clean_files), + SoSUsernameParser(self.cleaner_mapping, skip_clean_files) ] for _parser in self.opts.disable_parsers: @@ -262,6 +265,10 @@ def add_parser_options(cls, parser): default=[], dest='disable_parsers', help=('Disable specific parsers, so that those ' 'elements are not obfuscated')) + clean_grp.add_argument('--skip-clean-files', action='extend', + default=[], dest='skip_clean_files', + help=('List of files to skip/ignore during ' + 'cleaning. Asterisks are supported.')) clean_grp.add_argument('-j', '--jobs', default=4, type=int, help='Number of concurrent archives to clean') clean_grp.add_argument('--keywords', action='extend', default=[], @@ -323,10 +330,11 @@ def inspect_target_archive(self): if self.nested_archive: self.nested_archive.ui_name = self.nested_archive.description - def validate_parser_values(self): - """Check any values passed to the parsers via the commandline, e.g. - the --domains option, to ensure that they are valid for the parser in - question. + def review_parser_values(self): + """Check any values passed to the parsers via the commandline: + - For the --domains option, ensure that they are valid for the parser + in question. + - Convert --skip-clean-files from globs to regular expressions. """ for _dom in self.opts.domains: if len(_dom.split('.')) < 2: @@ -334,6 +342,8 @@ def validate_parser_values(self): f"Invalid value '{_dom}' given: --domains values must be " "actual domains" ) + self.opts.skip_clean_files = [fnmatch.translate(p) for p in + self.opts.skip_clean_files] def execute(self): """SoSCleaner will begin by inspecting the TARGET option to determine diff --git a/sos/cleaner/archives/__init__.py b/sos/cleaner/archives/__init__.py index a729862d57..c6a4c5c16e 100644 --- a/sos/cleaner/archives/__init__.py +++ b/sos/cleaner/archives/__init__.py @@ -50,7 +50,6 @@ class SoSObfuscationArchive(): type_name = 'undetermined' description = 'undetermined' is_nested = False - skip_files = [] prep_files = {} def __init__(self, archive_path, tmpdir): diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py index a1057df9fb..14a3ef7fde 100644 --- a/sos/cleaner/parsers/__init__.py +++ b/sos/cleaner/parsers/__init__.py @@ -42,22 +42,24 @@ class SoSCleanerParser(): name = 'Undefined Parser' regex_patterns = [] skip_line_patterns = [] - skip_files = [] + parser_skip_files = [] # list of skip files relevant to a parser + skip_clean_files = [] # list of global skip files from cmdline arguments map_file_key = 'unset' compile_regexes = True - def __init__(self, config={}): + def __init__(self, config={}, skip_clean_files=[]): if self.map_file_key in config: self.mapping.conf_update(config[self.map_file_key]) + self.skip_clean_files = skip_clean_files self._generate_skip_regexes() def _generate_skip_regexes(self): - """Generate the regexes for the parser's configured `skip_files`, - so that we don't regenerate them on every file being examined for if - the parser should skip a given file. + """Generate the regexes for the parser's configured parser_skip_files + or global skip_clean_files, so that we don't regenerate them on every + file being examined for if the parser should skip a given file. """ self.skip_patterns = [] - for p in self.skip_files: + for p in self.parser_skip_files + self.skip_clean_files: self.skip_patterns.append(re.compile(p)) def generate_item_regexes(self): diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py index a739629844..642aa05d29 100644 --- a/sos/cleaner/parsers/hostname_parser.py +++ b/sos/cleaner/parsers/hostname_parser.py @@ -21,9 +21,9 @@ class SoSHostnameParser(SoSCleanerParser): r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))' ] - def __init__(self, config): + def __init__(self, config, skip_clean_files=[]): self.mapping = SoSHostnameMap() - super(SoSHostnameParser, self).__init__(config) + super(SoSHostnameParser, self).__init__(config, skip_clean_files) def parse_line(self, line): """This will be called for every line in every file we process, so that diff --git a/sos/cleaner/parsers/ip_parser.py b/sos/cleaner/parsers/ip_parser.py index d5522ac237..f6d464a513 100644 --- a/sos/cleaner/parsers/ip_parser.py +++ b/sos/cleaner/parsers/ip_parser.py @@ -25,7 +25,7 @@ class SoSIPParser(SoSCleanerParser): r'.*dnf\[.*\]:' ] - skip_files = [ + parser_skip_files = [ # skip these as version numbers will frequently look like IP addresses # when using regex matching 'installed-debs', @@ -44,6 +44,6 @@ class SoSIPParser(SoSCleanerParser): map_file_key = 'ip_map' compile_regexes = False - def __init__(self, config): + def __init__(self, config, skip_clean_files=[]): self.mapping = SoSIPMap() - super(SoSIPParser, self).__init__(config) + super(SoSIPParser, self).__init__(config, skip_clean_files) diff --git a/sos/cleaner/parsers/ipv6_parser.py b/sos/cleaner/parsers/ipv6_parser.py index b209c646d1..dfd7282a1b 100644 --- a/sos/cleaner/parsers/ipv6_parser.py +++ b/sos/cleaner/parsers/ipv6_parser.py @@ -29,15 +29,15 @@ class SoSIPv6Parser(SoSCleanerParser): r"(([0-9a-f]{1,4}(:[0-9a-f]{0,4}){0,5}))([^.])::(([0-9a-f]{1,4}" r"(:[0-9a-f]{1,4}){0,5})?))(/\d{1,3})?(?![:\\a-z0-9])" ] - skip_files = [ + parser_skip_files = [ 'etc/dnsmasq.conf.*', '.*modinfo.*', ] compile_regexes = False - def __init__(self, config): + def __init__(self, config, skip_clean_files=[]): self.mapping = SoSIPv6Map() - super(SoSIPv6Parser, self).__init__(config) + super(SoSIPv6Parser, self).__init__(config, skip_clean_files) def get_map_contents(self): """Structure the dataset contents properly so that they can be reloaded diff --git a/sos/cleaner/parsers/keyword_parser.py b/sos/cleaner/parsers/keyword_parser.py index f611ccd2b1..3c6c442b8b 100644 --- a/sos/cleaner/parsers/keyword_parser.py +++ b/sos/cleaner/parsers/keyword_parser.py @@ -20,9 +20,9 @@ class SoSKeywordParser(SoSCleanerParser): name = 'Keyword Parser' map_file_key = 'keyword_map' - def __init__(self, config): + def __init__(self, config, skip_clean_files=[]): self.mapping = SoSKeywordMap() - super(SoSKeywordParser, self).__init__(config) + super(SoSKeywordParser, self).__init__(config, skip_clean_files) def _parse_line(self, line): return line, 0 diff --git a/sos/cleaner/parsers/mac_parser.py b/sos/cleaner/parsers/mac_parser.py index 4e790018e9..74f95a6aaa 100644 --- a/sos/cleaner/parsers/mac_parser.py +++ b/sos/cleaner/parsers/mac_parser.py @@ -43,15 +43,15 @@ class SoSMacParser(SoSCleanerParser): '53:4f:53', '534f:53' ) - skip_files = [ + parser_skip_files = [ 'sos_commands/.*/modinfo.*' ] map_file_key = 'mac_map' compile_regexes = False - def __init__(self, config): + def __init__(self, config, skip_clean_files=[]): self.mapping = SoSMacMap() - super(SoSMacParser, self).__init__(config) + super(SoSMacParser, self).__init__(config, skip_clean_files) def reduce_mac_match(self, match): """Strips away leading and trailing non-alphanum characters from any diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py index 5909f52d39..c999ff55ef 100644 --- a/sos/cleaner/parsers/username_parser.py +++ b/sos/cleaner/parsers/username_parser.py @@ -26,9 +26,9 @@ class SoSUsernameParser(SoSCleanerParser): map_file_key = 'username_map' regex_patterns = [] - def __init__(self, config): + def __init__(self, config, skip_clean_files=[]): self.mapping = SoSUsernameMap() - super(SoSUsernameParser, self).__init__(config) + super(SoSUsernameParser, self).__init__(config, skip_clean_files) def _parse_line(self, line): return line, 0 diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py index ff0c1ab7c2..cbefad7d3d 100644 --- a/sos/collector/__init__.py +++ b/sos/collector/__init__.py @@ -87,6 +87,7 @@ class SoSCollector(SoSComponent): 'group': None, 'image': '', 'force_pull_image': True, + 'skip_clean_files': [], 'jobs': 4, 'journal_size': 0, 'keywords': [], @@ -483,6 +484,10 @@ def add_parser_options(cls, parser): default=[], dest='disable_parsers', help=('Disable specific parsers, so that ' 'those elements are not obfuscated')) + cleaner_grp.add_argument('--skip-clean-files', action='extend', + default=[], dest='skip_clean_files', + help=('List of files to skip/ignore during ' + 'cleaning. Asterisks are supported.')) cleaner_grp.add_argument('--keywords', action='extend', default=[], dest='keywords', help='List of keywords to obfuscate') diff --git a/sos/report/__init__.py b/sos/report/__init__.py index 77087ed19c..e3019c5827 100644 --- a/sos/report/__init__.py +++ b/sos/report/__init__.py @@ -88,6 +88,7 @@ class SoSReport(SoSComponent): 'desc': '', 'domains': [], 'disable_parsers': [], + 'skip_clean_files': [], 'dry_run': False, 'estimate_only': False, 'experimental': False, @@ -358,6 +359,10 @@ def add_parser_options(cls, parser): default=[], dest='disable_parsers', help=('Disable specific parsers, so that ' 'those elements are not obfuscated')) + cleaner_grp.add_argument('--skip-clean-files', action='extend', + default=[], dest='skip_clean_files', + help=('List of files to skip/ignore during ' + 'cleaning. Asterisks are supported.')) cleaner_grp.add_argument('--keywords', action='extend', default=[], dest='keywords', help='List of keywords to obfuscate')