Skip to content

Commit

Permalink
[cleaner] Add option to skip cleaning files
Browse files Browse the repository at this point in the history
A new option --skip-clean-files allows cleaner to skip cleaning files
where the user is certain no sensitive information is present.

The option supports globs / wildcards.

Relevant: #3469
Closes: #3520

Signed-off-by: Pavel Moravec <pmoravec@redhat.com>
  • Loading branch information
pmoravec committed Feb 12, 2024
1 parent db3f4cf commit fdb4ec8
Show file tree
Hide file tree
Showing 12 changed files with 64 additions and 33 deletions.
10 changes: 10 additions & 0 deletions man/en/sos-clean.1
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ sos clean - Obfuscate sensitive data from one or more sosreports
.B sos clean TARGET [options]
[\-\-domains]
[\-\-disable-parsers]
[\-\-skip-clean-files]
[\-\-keywords]
[\-\-keyword-file]
[\-\-map-file]
Expand Down Expand Up @@ -63,6 +64,15 @@ trust in the party/parties that may handle the generated report.
Valid values for this option are currently: \fBhostname\fR, \fBip\fR, \fBipv6\fR,
\fBmac\fR, \fBkeyword\fR, and \fBusername\fR.
.TP
.B \-\-skip-clean-files FILES
Provide a comma-delimited list of files inside an archive, that cleaner should skip in cleaning.

Globs like asterisk are supported, so \fBsos_commands/host/hostname*\fR will match all three
usual filenames in that directory (\fBhostname\fR, \fBhostnamectl_status\fR and \fBhostname_-f\fR).

Use this option with caution, only when being certain the given files do not contain any sensitive
information.
.TP
.B \-\-keywords KEYWORDS
Provide a comma-delimited list of keywords to scrub in addition to the default parsers.

Expand Down
32 changes: 21 additions & 11 deletions sos/cleaner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import shutil
import sos.cleaner.preppers
import tempfile
import fnmatch

from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
Expand Down Expand Up @@ -81,6 +82,7 @@ class SoSCleaner(SoSComponent):
'archive_type': 'auto',
'domains': [],
'disable_parsers': [],
'skip_clean_files': [],
'jobs': 4,
'keywords': [],
'keyword_file': None,
Expand Down Expand Up @@ -116,7 +118,7 @@ def __init__(self, parser=None, args=None, cmdline=None, in_place=False,
# when obfuscating a SoSCollector run during archive extraction
os.makedirs(os.path.join(self.tmpdir, 'cleaner'), exist_ok=True)

self.validate_parser_values()
self.review_parser_values()

self.cleaner_mapping = self.load_map_file()
os.umask(0o77)
Expand All @@ -125,13 +127,14 @@ def __init__(self, parser=None, args=None, cmdline=None, in_place=False,

self.cleaner_md = self.manifest.components.add_section('cleaner')

skip_clean_files = self.opts.skip_clean_files
self.parsers = [
SoSHostnameParser(self.cleaner_mapping),
SoSIPParser(self.cleaner_mapping),
SoSIPv6Parser(self.cleaner_mapping),
SoSMacParser(self.cleaner_mapping),
SoSKeywordParser(self.cleaner_mapping),
SoSUsernameParser(self.cleaner_mapping)
SoSHostnameParser(self.cleaner_mapping, skip_clean_files),
SoSIPParser(self.cleaner_mapping, skip_clean_files),
SoSIPv6Parser(self.cleaner_mapping, skip_clean_files),
SoSMacParser(self.cleaner_mapping, skip_clean_files),
SoSKeywordParser(self.cleaner_mapping, skip_clean_files),
SoSUsernameParser(self.cleaner_mapping, skip_clean_files)
]

for _parser in self.opts.disable_parsers:
Expand Down Expand Up @@ -262,6 +265,10 @@ def add_parser_options(cls, parser):
default=[], dest='disable_parsers',
help=('Disable specific parsers, so that those '
'elements are not obfuscated'))
clean_grp.add_argument('--skip-clean-files', action='extend',
default=[], dest='skip_clean_files',
help=('List of files to skip/ignore during '
'cleaning. Asterisks are supported.'))
clean_grp.add_argument('-j', '--jobs', default=4, type=int,
help='Number of concurrent archives to clean')
clean_grp.add_argument('--keywords', action='extend', default=[],
Expand Down Expand Up @@ -323,17 +330,20 @@ def inspect_target_archive(self):
if self.nested_archive:
self.nested_archive.ui_name = self.nested_archive.description

def validate_parser_values(self):
"""Check any values passed to the parsers via the commandline, e.g.
the --domains option, to ensure that they are valid for the parser in
question.
def review_parser_values(self):
"""Check any values passed to the parsers via the commandline:
- For the --domains option, ensure that they are valid for the parser
in question.
- Convert --skip-clean-files from globs to regular expressions.
"""
for _dom in self.opts.domains:
if len(_dom.split('.')) < 2:
raise Exception(
f"Invalid value '{_dom}' given: --domains values must be "
"actual domains"
)
self.opts.skip_clean_files = [fnmatch.translate(p) for p in
self.opts.skip_clean_files]

def execute(self):
"""SoSCleaner will begin by inspecting the TARGET option to determine
Expand Down
1 change: 0 additions & 1 deletion sos/cleaner/archives/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ class SoSObfuscationArchive():
type_name = 'undetermined'
description = 'undetermined'
is_nested = False
skip_files = []
prep_files = {}

def __init__(self, archive_path, tmpdir):
Expand Down
14 changes: 8 additions & 6 deletions sos/cleaner/parsers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,22 +42,24 @@ class SoSCleanerParser():
name = 'Undefined Parser'
regex_patterns = []
skip_line_patterns = []
skip_files = []
parser_skip_files = [] # list of skip files relevant to a parser
skip_clean_files = [] # list of global skip files from cmdline arguments
map_file_key = 'unset'
compile_regexes = True

def __init__(self, config={}):
def __init__(self, config={}, skip_clean_files=[]):
if self.map_file_key in config:
self.mapping.conf_update(config[self.map_file_key])
self.skip_clean_files = skip_clean_files
self._generate_skip_regexes()

def _generate_skip_regexes(self):
"""Generate the regexes for the parser's configured `skip_files`,
so that we don't regenerate them on every file being examined for if
the parser should skip a given file.
"""Generate the regexes for the parser's configured parser_skip_files
or global skip_clean_files, so that we don't regenerate them on every
file being examined for if the parser should skip a given file.
"""
self.skip_patterns = []
for p in self.skip_files:
for p in self.parser_skip_files + self.skip_clean_files:
self.skip_patterns.append(re.compile(p))

def generate_item_regexes(self):
Expand Down
4 changes: 2 additions & 2 deletions sos/cleaner/parsers/hostname_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ class SoSHostnameParser(SoSCleanerParser):
r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))'
]

def __init__(self, config):
def __init__(self, config, skip_clean_files=[]):
self.mapping = SoSHostnameMap()
super(SoSHostnameParser, self).__init__(config)
super(SoSHostnameParser, self).__init__(config, skip_clean_files)

def parse_line(self, line):
"""This will be called for every line in every file we process, so that
Expand Down
6 changes: 3 additions & 3 deletions sos/cleaner/parsers/ip_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class SoSIPParser(SoSCleanerParser):
r'.*dnf\[.*\]:'
]

skip_files = [
parser_skip_files = [
# skip these as version numbers will frequently look like IP addresses
# when using regex matching
'installed-debs',
Expand All @@ -44,6 +44,6 @@ class SoSIPParser(SoSCleanerParser):
map_file_key = 'ip_map'
compile_regexes = False

def __init__(self, config):
def __init__(self, config, skip_clean_files=[]):
self.mapping = SoSIPMap()
super(SoSIPParser, self).__init__(config)
super(SoSIPParser, self).__init__(config, skip_clean_files)
6 changes: 3 additions & 3 deletions sos/cleaner/parsers/ipv6_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@ class SoSIPv6Parser(SoSCleanerParser):
r"(([0-9a-f]{1,4}(:[0-9a-f]{0,4}){0,5}))([^.])::(([0-9a-f]{1,4}"
r"(:[0-9a-f]{1,4}){0,5})?))(/\d{1,3})?(?![:\\a-z0-9])"
]
skip_files = [
parser_skip_files = [
'etc/dnsmasq.conf.*',
'.*modinfo.*',
]
compile_regexes = False

def __init__(self, config):
def __init__(self, config, skip_clean_files=[]):
self.mapping = SoSIPv6Map()
super(SoSIPv6Parser, self).__init__(config)
super(SoSIPv6Parser, self).__init__(config, skip_clean_files)

def get_map_contents(self):
"""Structure the dataset contents properly so that they can be reloaded
Expand Down
4 changes: 2 additions & 2 deletions sos/cleaner/parsers/keyword_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ class SoSKeywordParser(SoSCleanerParser):
name = 'Keyword Parser'
map_file_key = 'keyword_map'

def __init__(self, config):
def __init__(self, config, skip_clean_files=[]):
self.mapping = SoSKeywordMap()
super(SoSKeywordParser, self).__init__(config)
super(SoSKeywordParser, self).__init__(config, skip_clean_files)

def _parse_line(self, line):
return line, 0
6 changes: 3 additions & 3 deletions sos/cleaner/parsers/mac_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,15 @@ class SoSMacParser(SoSCleanerParser):
'53:4f:53',
'534f:53'
)
skip_files = [
parser_skip_files = [
'sos_commands/.*/modinfo.*'
]
map_file_key = 'mac_map'
compile_regexes = False

def __init__(self, config):
def __init__(self, config, skip_clean_files=[]):
self.mapping = SoSMacMap()
super(SoSMacParser, self).__init__(config)
super(SoSMacParser, self).__init__(config, skip_clean_files)

def reduce_mac_match(self, match):
"""Strips away leading and trailing non-alphanum characters from any
Expand Down
4 changes: 2 additions & 2 deletions sos/cleaner/parsers/username_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ class SoSUsernameParser(SoSCleanerParser):
map_file_key = 'username_map'
regex_patterns = []

def __init__(self, config):
def __init__(self, config, skip_clean_files=[]):
self.mapping = SoSUsernameMap()
super(SoSUsernameParser, self).__init__(config)
super(SoSUsernameParser, self).__init__(config, skip_clean_files)

def _parse_line(self, line):
return line, 0
5 changes: 5 additions & 0 deletions sos/collector/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class SoSCollector(SoSComponent):
'group': None,
'image': '',
'force_pull_image': True,
'skip_clean_files': [],
'jobs': 4,
'journal_size': 0,
'keywords': [],
Expand Down Expand Up @@ -483,6 +484,10 @@ def add_parser_options(cls, parser):
default=[], dest='disable_parsers',
help=('Disable specific parsers, so that '
'those elements are not obfuscated'))
cleaner_grp.add_argument('--skip-clean-files', action='extend',
default=[], dest='skip_clean_files',
help=('List of files to skip/ignore during '
'cleaning. Asterisks are supported.'))
cleaner_grp.add_argument('--keywords', action='extend', default=[],
dest='keywords',
help='List of keywords to obfuscate')
Expand Down
5 changes: 5 additions & 0 deletions sos/report/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ class SoSReport(SoSComponent):
'desc': '',
'domains': [],
'disable_parsers': [],
'skip_clean_files': [],
'dry_run': False,
'estimate_only': False,
'experimental': False,
Expand Down Expand Up @@ -358,6 +359,10 @@ def add_parser_options(cls, parser):
default=[], dest='disable_parsers',
help=('Disable specific parsers, so that '
'those elements are not obfuscated'))
cleaner_grp.add_argument('--skip-clean-files', action='extend',
default=[], dest='skip_clean_files',
help=('List of files to skip/ignore during '
'cleaning. Asterisks are supported.'))
cleaner_grp.add_argument('--keywords', action='extend', default=[],
dest='keywords',
help='List of keywords to obfuscate')
Expand Down

0 comments on commit fdb4ec8

Please sign in to comment.