Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[cleaner] Add option to skip cleaning files #3520

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions man/en/sos-clean.1
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ sos clean - Obfuscate sensitive data from one or more sosreports
.B sos clean TARGET [options]
[\-\-domains]
[\-\-disable-parsers]
[\-\-skip-cleaning-files|\-\-skip-masking-files]
[\-\-keywords]
[\-\-keyword-file]
[\-\-map-file]
Expand Down Expand Up @@ -63,6 +64,15 @@ trust in the party/parties that may handle the generated report.
Valid values for this option are currently: \fBhostname\fR, \fBip\fR, \fBipv6\fR,
\fBmac\fR, \fBkeyword\fR, and \fBusername\fR.
.TP
.B \-\-skip-cleaning-files, \-\-skip-masking-files FILES
Provide a comma-delimited list of files inside an archive, that cleaner should skip in cleaning.

Globs like asterisk are supported, so \fBsos_commands/host/hostname*\fR will match all three
usual filenames in that directory (\fBhostname\fR, \fBhostnamectl_status\fR and \fBhostname_-f\fR).

Use this option with caution, only when being certain the given files do not contain any sensitive
information.
.TP
.B \-\-keywords KEYWORDS
Provide a comma-delimited list of keywords to scrub in addition to the default parsers.

Expand Down
33 changes: 22 additions & 11 deletions sos/cleaner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import shutil
import sos.cleaner.preppers
import tempfile
import fnmatch

from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
Expand Down Expand Up @@ -81,6 +82,7 @@ class SoSCleaner(SoSComponent):
'archive_type': 'auto',
'domains': [],
'disable_parsers': [],
'skip_clean_files': [],
'jobs': 4,
'keywords': [],
'keyword_file': None,
Expand Down Expand Up @@ -116,7 +118,7 @@ def __init__(self, parser=None, args=None, cmdline=None, in_place=False,
# when obfuscating a SoSCollector run during archive extraction
os.makedirs(os.path.join(self.tmpdir, 'cleaner'), exist_ok=True)

self.validate_parser_values()
self.review_parser_values()

self.cleaner_mapping = self.load_map_file()
os.umask(0o77)
Expand All @@ -125,13 +127,14 @@ def __init__(self, parser=None, args=None, cmdline=None, in_place=False,

self.cleaner_md = self.manifest.components.add_section('cleaner')

skip_clean_files = self.opts.skip_clean_files
self.parsers = [
SoSHostnameParser(self.cleaner_mapping),
SoSIPParser(self.cleaner_mapping),
SoSIPv6Parser(self.cleaner_mapping),
SoSMacParser(self.cleaner_mapping),
SoSKeywordParser(self.cleaner_mapping),
SoSUsernameParser(self.cleaner_mapping)
SoSHostnameParser(self.cleaner_mapping, skip_clean_files),
SoSIPParser(self.cleaner_mapping, skip_clean_files),
SoSIPv6Parser(self.cleaner_mapping, skip_clean_files),
SoSMacParser(self.cleaner_mapping, skip_clean_files),
SoSKeywordParser(self.cleaner_mapping, skip_clean_files),
SoSUsernameParser(self.cleaner_mapping, skip_clean_files)
]

for _parser in self.opts.disable_parsers:
Expand Down Expand Up @@ -262,6 +265,11 @@ def add_parser_options(cls, parser):
default=[], dest='disable_parsers',
help=('Disable specific parsers, so that those '
'elements are not obfuscated'))
clean_grp.add_argument('--skip-cleaning-files', '--skip-masking-files',
action='extend', default=[],
dest='skip_clean_files',
help=('List of files to skip/ignore during '
'cleaning. Globs are supported.'))
clean_grp.add_argument('-j', '--jobs', default=4, type=int,
help='Number of concurrent archives to clean')
clean_grp.add_argument('--keywords', action='extend', default=[],
Expand Down Expand Up @@ -323,17 +331,20 @@ def inspect_target_archive(self):
if self.nested_archive:
self.nested_archive.ui_name = self.nested_archive.description

def validate_parser_values(self):
"""Check any values passed to the parsers via the commandline, e.g.
the --domains option, to ensure that they are valid for the parser in
question.
def review_parser_values(self):
"""Check any values passed to the parsers via the commandline:
- For the --domains option, ensure that they are valid for the parser
in question.
- Convert --skip-cleaning-files from globs to regular expressions.
"""
for _dom in self.opts.domains:
if len(_dom.split('.')) < 2:
raise Exception(
f"Invalid value '{_dom}' given: --domains values must be "
"actual domains"
)
self.opts.skip_clean_files = [fnmatch.translate(p) for p in
self.opts.skip_clean_files]

def execute(self):
"""SoSCleaner will begin by inspecting the TARGET option to determine
Expand Down
1 change: 0 additions & 1 deletion sos/cleaner/archives/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ class SoSObfuscationArchive():
type_name = 'undetermined'
description = 'undetermined'
is_nested = False
skip_files = []
prep_files = {}

def __init__(self, archive_path, tmpdir):
Expand Down
14 changes: 8 additions & 6 deletions sos/cleaner/parsers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,22 +42,24 @@ class SoSCleanerParser():
name = 'Undefined Parser'
regex_patterns = []
skip_line_patterns = []
skip_files = []
parser_skip_files = [] # list of skip files relevant to a parser
skip_clean_files = [] # list of global skip files from cmdline arguments
map_file_key = 'unset'
compile_regexes = True

def __init__(self, config={}):
def __init__(self, config={}, skip_clean_files=[]):
if self.map_file_key in config:
self.mapping.conf_update(config[self.map_file_key])
self.skip_clean_files = skip_clean_files
self._generate_skip_regexes()

def _generate_skip_regexes(self):
"""Generate the regexes for the parser's configured `skip_files`,
so that we don't regenerate them on every file being examined for if
the parser should skip a given file.
"""Generate the regexes for the parser's configured parser_skip_files
or global skip_clean_files, so that we don't regenerate them on every
file being examined for if the parser should skip a given file.
"""
self.skip_patterns = []
for p in self.skip_files:
for p in self.parser_skip_files + self.skip_clean_files:
self.skip_patterns.append(re.compile(p))

def generate_item_regexes(self):
Expand Down
4 changes: 2 additions & 2 deletions sos/cleaner/parsers/hostname_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ class SoSHostnameParser(SoSCleanerParser):
r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))'
]

def __init__(self, config):
def __init__(self, config, skip_clean_files=[]):
self.mapping = SoSHostnameMap()
super(SoSHostnameParser, self).__init__(config)
super(SoSHostnameParser, self).__init__(config, skip_clean_files)

def parse_line(self, line):
"""This will be called for every line in every file we process, so that
Expand Down
6 changes: 3 additions & 3 deletions sos/cleaner/parsers/ip_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class SoSIPParser(SoSCleanerParser):
r'.*dnf\[.*\]:'
]

skip_files = [
parser_skip_files = [
# skip these as version numbers will frequently look like IP addresses
# when using regex matching
'installed-debs',
Expand All @@ -44,6 +44,6 @@ class SoSIPParser(SoSCleanerParser):
map_file_key = 'ip_map'
compile_regexes = False

def __init__(self, config):
def __init__(self, config, skip_clean_files=[]):
self.mapping = SoSIPMap()
super(SoSIPParser, self).__init__(config)
super(SoSIPParser, self).__init__(config, skip_clean_files)
6 changes: 3 additions & 3 deletions sos/cleaner/parsers/ipv6_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@ class SoSIPv6Parser(SoSCleanerParser):
r"(([0-9a-f]{1,4}(:[0-9a-f]{0,4}){0,5}))([^.])::(([0-9a-f]{1,4}"
r"(:[0-9a-f]{1,4}){0,5})?))(/\d{1,3})?(?![:\\a-z0-9])"
]
skip_files = [
parser_skip_files = [
'etc/dnsmasq.conf.*',
'.*modinfo.*',
]
compile_regexes = False

def __init__(self, config):
def __init__(self, config, skip_clean_files=[]):
self.mapping = SoSIPv6Map()
super(SoSIPv6Parser, self).__init__(config)
super(SoSIPv6Parser, self).__init__(config, skip_clean_files)

def get_map_contents(self):
"""Structure the dataset contents properly so that they can be reloaded
Expand Down
4 changes: 2 additions & 2 deletions sos/cleaner/parsers/keyword_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ class SoSKeywordParser(SoSCleanerParser):
name = 'Keyword Parser'
map_file_key = 'keyword_map'

def __init__(self, config):
def __init__(self, config, skip_clean_files=[]):
self.mapping = SoSKeywordMap()
super(SoSKeywordParser, self).__init__(config)
super(SoSKeywordParser, self).__init__(config, skip_clean_files)

def _parse_line(self, line):
return line, 0
6 changes: 3 additions & 3 deletions sos/cleaner/parsers/mac_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,15 @@ class SoSMacParser(SoSCleanerParser):
'53:4f:53',
'534f:53'
)
skip_files = [
parser_skip_files = [
'sos_commands/.*/modinfo.*'
]
map_file_key = 'mac_map'
compile_regexes = False

def __init__(self, config):
def __init__(self, config, skip_clean_files=[]):
self.mapping = SoSMacMap()
super(SoSMacParser, self).__init__(config)
super(SoSMacParser, self).__init__(config, skip_clean_files)

def reduce_mac_match(self, match):
"""Strips away leading and trailing non-alphanum characters from any
Expand Down
4 changes: 2 additions & 2 deletions sos/cleaner/parsers/username_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ class SoSUsernameParser(SoSCleanerParser):
map_file_key = 'username_map'
regex_patterns = []

def __init__(self, config):
def __init__(self, config, skip_clean_files=[]):
self.mapping = SoSUsernameMap()
super(SoSUsernameParser, self).__init__(config)
super(SoSUsernameParser, self).__init__(config, skip_clean_files)

def _parse_line(self, line):
return line, 0
6 changes: 6 additions & 0 deletions sos/collector/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class SoSCollector(SoSComponent):
'group': None,
'image': '',
'force_pull_image': True,
'skip_clean_files': [],
'jobs': 4,
'journal_size': 0,
'keywords': [],
Expand Down Expand Up @@ -483,6 +484,11 @@ def add_parser_options(cls, parser):
default=[], dest='disable_parsers',
help=('Disable specific parsers, so that '
'those elements are not obfuscated'))
cleaner_grp.add_argument('--skip-cleaning-files',
'--skip-masking-files', action='extend',
default=[], dest='skip_clean_files',
help=('List of files to skip/ignore during '
'cleaning. Globs are supported.'))
cleaner_grp.add_argument('--keywords', action='extend', default=[],
dest='keywords',
help='List of keywords to obfuscate')
Expand Down
6 changes: 6 additions & 0 deletions sos/report/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ class SoSReport(SoSComponent):
'desc': '',
'domains': [],
'disable_parsers': [],
'skip_clean_files': [],
'dry_run': False,
'estimate_only': False,
'experimental': False,
Expand Down Expand Up @@ -358,6 +359,11 @@ def add_parser_options(cls, parser):
default=[], dest='disable_parsers',
help=('Disable specific parsers, so that '
'those elements are not obfuscated'))
cleaner_grp.add_argument('--skip-cleaning-files',
'--skip-masking-files', action='extend',
default=[], dest='skip_clean_files',
help=('List of files to skip/ignore during '
'cleaning. Globs are supported.'))
cleaner_grp.add_argument('--keywords', action='extend', default=[],
dest='keywords',
help='List of keywords to obfuscate')
Expand Down
14 changes: 11 additions & 3 deletions tests/cleaner_tests/basic_function_tests/report_with_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,15 @@ def test_perms_unchanged_on_modified_file(self):
self.assertEqual(imode_orig, imode_obfuscated)


class ReportWithCleanedKeywords(StageOneReportTest):
"""Testing for obfuscated keywords provided by the user
class ReportWithUserCustomisations(StageOneReportTest):
"""Testing for 1) obfuscated keywords provided by the user (--keywords option),
and 2) skipping to clean specific files (--skip-cleaning-files option)

:avocado: tags=stageone
"""

sos_cmd = '--clean -o filesys,kernel --keywords=fstab,Linux,tmp --no-update'
sos_cmd = '--clean -o filesys,kernel --keywords=fstab,Linux,tmp,BOOT_IMAGE,fs.dentry-state \
--skip-cleaning-files proc/cmdline,sos_commands/*/sysctl* --no-update'

# Will the 'tmp' be properly treated in path to working dir without raising an error?
# To make this test effective, we assume the test runs on a system / with Policy
Expand All @@ -96,6 +98,12 @@ def test_filename_obfuscated(self):
def test_keyword_obfuscated_in_file(self):
self.assertFileNotHasContent('sos_commands/kernel/uname_-a', 'Linux')

def test_skip_cleaning_single_file(self):
self.assertFileHasContent('proc/cmdline', 'BOOT_IMAGE')

def test_skip_cleaning_glob_file(self):
self.assertFileHasContent('sos_commands/kernel/sysctl_-a', 'fs.dentry-state')


class DefaultRemoveBinaryFilesTest(StageTwoReportTest):
"""Testing that binary files are removed by default
Expand Down
Loading