From 3fb3b5beeffefb5728ae50c6d8cac93a1f8f8559 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 5 Nov 2018 12:11:57 +0000 Subject: [PATCH 1/7] validate_docstrings.py to exit with status code as the number of errors (so, 0 for no errors) --- scripts/tests/test_validate_docstrings.py | 98 ++++++++++++++++------- scripts/validate_docstrings.py | 33 +++++--- 2 files changed, 91 insertions(+), 40 deletions(-) diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index a3feee6552178..fba0910f1e080 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -1,14 +1,14 @@ -import string -import random import io +import os +import random +import string +import textwrap import pytest import numpy as np - +from pandas.util.testing import capture_stderr import validate_docstrings validate_one = validate_docstrings.validate_one -from pandas.util.testing import capture_stderr - class GoodDocStrings(object): """ @@ -779,40 +779,40 @@ def test_bad_examples(self, capsys, klass, func, msgs): class ApiItems(object): @property def api_doc(self): - return io.StringIO(''' -.. currentmodule:: itertools + return textwrap.dedent(io.StringIO(''' + .. currentmodule:: itertools -Itertools ---------- + Itertools + --------- -Infinite -~~~~~~~~ + Infinite + ~~~~~~~~ -.. autosummary:: + .. autosummary:: - cycle - count + cycle + count -Finite -~~~~~~ + Finite + ~~~~~~ -.. autosummary:: + .. autosummary:: - chain + chain -.. currentmodule:: random + .. currentmodule:: random -Random ------- + Random + ------ -All -~~~ + All + ~~~ -.. autosummary:: + .. autosummary:: - seed - randint -''') + seed + randint + ''')) @pytest.mark.parametrize('idx,name', [(0, 'itertools.cycle'), (1, 'itertools.count'), @@ -850,3 +850,47 @@ def test_item_section(self, idx, section): def test_item_subsection(self, idx, subsection): result = list(validate_docstrings.get_api_items(self.api_doc)) assert result[idx][3] == subsection + + +def test_num_errors_for_validate_one(monkeypatch): + monkeypatch.setattr(validate_docstrings, 'validate_one', + lambda func_name: {'docstring': 'docstring1', + 'errors': ['err1', 'err2', 'err3'], + 'warnings': [], + 'examples_errors': ''}) + with open(os.devnull, 'w') as devnull: + num_errors = validate_docstrings.main('docstring1', devnull) + assert num_errors == 3 + + +def test_no_num_errors_for_validate_one(monkeypatch): + monkeypatch.setattr(validate_docstrings, 'validate_one', + lambda func_name: {'docstring': 'docstring1', + 'errors': [], + 'warnings': ['warn1'], + 'examples_errors': ''}) + with open(os.devnull, 'w') as devnull: + num_errors = validate_docstrings.main('docstring1', devnull) + assert num_errors == 0 + + +def test_num_errors_for_validate_all(monkeypatch): + monkeypatch.setattr(validate_docstrings, 'validate_all', + lambda: {'docstring1': {'errors': ['err1', + 'err2', + 'err3']}, + 'docstring2': {'errors': ['err4', + 'err5']}}) + with open(os.devnull, 'w') as devnull: + num_errors = validate_docstrings.main(None, devnull) + assert num_errors == 5 + + +def test_no_num_errors_for_validate_all(monkeypatch): + monkeypatch.setattr(validate_docstrings, 'validate_all', + lambda: {'docstring1': {'errors': [], + 'warnings': ['warn1']}, + 'docstring2': {'errors': []}}) + with open(os.devnull, 'w') as devnull: + num_errors = validate_docstrings.main(None, devnull) + assert num_errors == 0 diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index ef6465c3e988d..02cf29096074f 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -23,6 +23,7 @@ import pydoc import inspect import importlib +import itertools import doctest import tempfile @@ -644,29 +645,35 @@ def header(title, width=80, char='#'): full_line=full_line, title_line=title_line) if func_name is None: - json_doc = validate_all() - fd.write(json.dumps(json_doc)) + result = validate_all() + fd.write(json.dumps(result)) + errors = itertools.chain(*(doc['errors'] for doc in result.values())) + num_errors = sum(1 for err in errors) else: - doc_info = validate_one(func_name) + result = validate_one(func_name) fd.write(header('Docstring ({})'.format(func_name))) - fd.write('{}\n'.format(doc_info['docstring'])) + fd.write('{}\n'.format(result['docstring'])) fd.write(header('Validation')) - if doc_info['errors']: - fd.write('{} Errors found:\n'.format(len(doc_info['errors']))) - for err in doc_info['errors']: + if result['errors']: + fd.write('{} Errors found:\n'.format(len(result['errors']))) + for err in result['errors']: fd.write('\t{}\n'.format(err)) - if doc_info['warnings']: - fd.write('{} Warnings found:\n'.format(len(doc_info['warnings']))) - for wrn in doc_info['warnings']: + if result['warnings']: + fd.write('{} Warnings found:\n'.format(len(result['warnings']))) + for wrn in result['warnings']: fd.write('\t{}\n'.format(wrn)) - if not doc_info['errors']: + if not result['errors']: fd.write('Docstring for "{}" correct. :)\n'.format(func_name)) - if doc_info['examples_errors']: + if result['examples_errors']: fd.write(header('Doctests')) - fd.write(doc_info['examples_errors']) + fd.write(result['examples_errors']) + + num_errors = len(result['errors']) + + return num_errors if __name__ == '__main__': From 99f0dd33493d7b8913952cc422c1d4aaec267a42 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 5 Nov 2018 16:10:42 +0000 Subject: [PATCH 2/7] Implemented different output types for the validate_all, and a prefix to filter which docstrings are validated --- scripts/validate_docstrings.py | 90 +++++++++++++++++++++++++++------- 1 file changed, 73 insertions(+), 17 deletions(-) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 02cf29096074f..933424e85311b 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -416,6 +416,8 @@ def validate_pep8(self): if not self.examples: return + # F401 is needed to not generate flake8 errors in examples + # that do not user numpy or pandas content = ''.join(('import numpy as np # noqa: F401\n', 'import pandas as pd # noqa: F401\n', *self.examples_source_code)) @@ -587,11 +589,17 @@ def validate_one(func_name): 'examples_errors': examples_errs} -def validate_all(): +def validate_all(prefix): """ Execute the validation of all docstrings, and return a dict with the results. + Parameters + ---------- + prefix : str, optional + If provided, only the docstrings that start with this pattern will be + validated. + Returns ------- dict @@ -606,6 +614,8 @@ def validate_all(): with open(api_doc_fname) as f: api_items = list(get_api_items(f)) for func_name, func_obj, section, subsection in api_items: + if prefix and not func_name.startswith(prefix): + continue doc_info = validate_one(func_name) result[func_name] = doc_info @@ -625,6 +635,8 @@ def validate_all(): func_name = 'pandas.{}.{}'.format(class_.__name__, member[0]) if (not member[0].startswith('_') and func_name not in api_item_names): + if prefix and not func_name.startswith(prefix): + continue doc_info = validate_one(func_name) result[func_name] = doc_info result[func_name]['in_api'] = False @@ -632,7 +644,7 @@ def validate_all(): return result -def main(func_name, fd): +def main(func_name, prefix, output_format): def header(title, width=80, char='#'): full_line = char * width side_len = (width - len(title) - 2) // 2 @@ -645,38 +657,68 @@ def header(title, width=80, char='#'): full_line=full_line, title_line=title_line) if func_name is None: - result = validate_all() - fd.write(json.dumps(result)) + result = validate_all(prefix) errors = itertools.chain(*(doc['errors'] for doc in result.values())) num_errors = sum(1 for err in errors) + + if output_format == 'json': + output = json.dumps(result) + else: + if output_format == 'default': + output_format = '{text}\n' + elif output_format == 'azure': + output_format = ('##vso[task.logissue type=error;' + 'sourcepath={path};' + 'linenumber={row};' + 'code={code};' + ']{text}\n') + else: + raise ValueError('Unknown output_format "{}"'.format( + output_format)) + + output = '' + for name, res in result.items(): + for error in res['errors']: + output += output_format.format(name=name, + path=res['file'], + row=res['file_line'], + code='ERR', + text='{}: {}'.format(name, + error)) + + sys.stderr.write(output) + else: result = validate_one(func_name) + num_errors = len(result['errors']) - fd.write(header('Docstring ({})'.format(func_name))) - fd.write('{}\n'.format(result['docstring'])) - fd.write(header('Validation')) + sys.stderr.write(header('Docstring ({})'.format(func_name))) + sys.stderr.write('{}\n'.format(result['docstring'])) + sys.stderr.write(header('Validation')) if result['errors']: - fd.write('{} Errors found:\n'.format(len(result['errors']))) + sys.stderr.write('{} Errors found:\n'.format( + len(result['errors']))) for err in result['errors']: - fd.write('\t{}\n'.format(err)) + sys.stderr.write('\t{}\n'.format(err)) if result['warnings']: - fd.write('{} Warnings found:\n'.format(len(result['warnings']))) + sys.stderr.write('{} Warnings found:\n'.format( + len(result['warnings']))) for wrn in result['warnings']: - fd.write('\t{}\n'.format(wrn)) + sys.stderr.write('\t{}\n'.format(wrn)) if not result['errors']: - fd.write('Docstring for "{}" correct. :)\n'.format(func_name)) + sys.stderr.write('Docstring for "{}" correct. :)\n'.format( + func_name)) if result['examples_errors']: - fd.write(header('Doctests')) - fd.write(result['examples_errors']) - - num_errors = len(result['errors']) + sys.stderr.write(header('Doctests')) + sys.stderr.write(result['examples_errors']) return num_errors if __name__ == '__main__': + format_opts = 'default', 'json', 'azure' func_help = ('function or method to validate (e.g. pandas.DataFrame.head) ' 'if not provided, all docstrings are validated and returned ' 'as JSON') @@ -686,5 +728,19 @@ def header(title, width=80, char='#'): nargs='?', default=None, help=func_help) + argparser.add_argument('--format', default='default', help='format of the ' + 'output when validating multiple docstrings ' + '(ignored when validating one). ' + 'It can be {}'.format(str(format_opts)[1:-1])) + argparser.add_argument('--prefix', default=None, help='pattern for the ' + 'docstring names, in order to decide which ones ' + 'will be validated. A prefix "pandas.Series.str.' + 'will make the script validate all the docstrings' + 'of methods starting by this pattern. It is ' + 'ignored if parameter function is provided') + args = argparser.parse_args() - sys.exit(main(args.function, sys.stdout)) + if args.format not in format_opts: + raise ValueError('--format argument must be one of {}'.format( + str(format_opts)[1:-1])) + sys.exit(main(args.function, args.prefix, args.format)) From fea94b51a5adbe5a5ae2fa9c76f8ab8f7204f1c7 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 5 Nov 2018 17:36:33 +0000 Subject: [PATCH 3/7] Codifying errors --- scripts/validate_docstrings.py | 213 +++++++++++++++++++-------------- 1 file changed, 121 insertions(+), 92 deletions(-) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 933424e85311b..db829b1749db5 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -323,16 +323,18 @@ def parameter_mismatches(self): doc_params = tuple(self.doc_parameters) missing = set(signature_params) - set(doc_params) if missing: - errs.append( - 'Parameters {} not documented'.format(pprint_thing(missing))) + errs.append(('PR01', + 'Parameters {} not documented'.format( + pprint_thing(missing)))) extra = set(doc_params) - set(signature_params) if extra: - errs.append('Unknown parameters {}'.format(pprint_thing(extra))) + errs.append(('PR02', 'Unknown parameters {}'.format( + pprint_thing(extra)))) if (not missing and not extra and signature_params != doc_params and not (not signature_params and not doc_params)): - errs.append('Wrong parameters order. ' + - 'Actual: {!r}. '.format(signature_params) + - 'Documented: {!r}'.format(doc_params)) + errs.append(('PR03', 'Wrong parameters order. ' + + 'Actual: {!r}. '.format(signature_params) + + 'Documented: {!r}'.format(doc_params))) return errs @@ -449,134 +451,157 @@ def validate_one(func_name): dict A dictionary containing all the information obtained from validating the docstring. + + Notes + ----- + The errors are codified in the next way: + - First two characters: Section where the error happens: + * GL: Global (no section, like section ordering errors) + * SS: Short summary + * ES: Extended summary + * PR: Parameters + * RT: Returns + * YD: Yields + * RS: Raises + * WN: Warns + * SA: See Also + * NT: Notes + * RF: References + * EX: Examples + - Last two characters: Numeric error code inside the section + + For example, RT02 is the second codified error in the Returns section. + + The exact codes are available in the source code of this function. """ doc = Docstring(func_name) errs = [] wrns = [] if doc.start_blank_lines != 1: - errs.append('Docstring text (summary) should start in the line ' - 'immediately after the opening quotes (not in the same ' - 'line, or leaving a blank line in between)') + errs.append(('GL01', 'Docstring text (summary) should start in the ' + 'line immediately after the opening quotes (not in the ' + 'same line, or leaving a blank line in between)')) if doc.end_blank_lines != 1: - errs.append('Closing quotes should be placed in the line after ' - 'the last text in the docstring (do not close the ' - 'quotes in the same line as the text, or leave a ' - 'blank line between the last text and the quotes)') + errs.append(('GL02', 'Closing quotes should be placed in the line ' + 'after the last text in the docstring (do not close the ' + 'quotes in the same line as the text, or leave a ' + 'blank line between the last text and the quotes)')) if doc.double_blank_lines: - errs.append('Use only one blank line to separate sections or ' - 'paragraphs') + errs.append(('GL03', 'Use only one blank line to separate sections or ' + 'paragraphs')) + mentioned_errs = doc.mentioned_private_classes + if mentioned_errs: + errs.append(('GL04', 'Private classes ({}) should not be mentioned in ' + 'public docstring.'.format(mentioned_errs))) + for line in doc.raw_doc.splitlines(): + if re.match("^ *\t", line): + errs.append(('GL05', 'Tabs found at the start of line "{}", ' + 'please use whitespace only'.format(line.lstrip()))) if not doc.summary: - errs.append('No summary found (a short summary in a single line ' - 'should be present at the beginning of the docstring)') + errs.append(('SS01', 'No summary found (a short summary in a single ' + 'line should be present at the beginning of the ' + 'docstring)')) else: if not doc.summary[0].isupper(): - errs.append('Summary does not start with a capital letter') + errs.append(('SS02', + 'Summary does not start with a capital letter')) if doc.summary[-1] != '.': - errs.append('Summary does not end with a period') + errs.append(('SS03', 'Summary does not end with a period')) if doc.summary != doc.summary.lstrip(): - errs.append('Summary contains heading whitespaces.') + errs.append(('SS04', 'Summary contains heading whitespaces')) elif (doc.is_function_or_method and doc.summary.split(' ')[0][-1] == 's'): - errs.append('Summary must start with infinitive verb, ' - 'not third person (e.g. use "Generate" instead of ' - '"Generates")') + errs.append(('SS05', 'Summary must start with infinitive verb, ' + 'not third person (e.g. use "Generate" instead of ' + '"Generates")')) if doc.num_summary_lines > 1: - errs.append("Summary should fit in a single line.") + errs.append(('SS06', "Summary should fit in a single line")) + if not doc.extended_summary: - wrns.append('No extended summary found') + wrns.append(('ES01', 'No extended summary found')) + + # PR01: Parameters not documented + # PR02: Unknown parameters + # PR03: Wrong parameters order + errs += doc.parameter_mismatches - param_errs = doc.parameter_mismatches for param in doc.doc_parameters: if not param.startswith("*"): # Check can ignore var / kwargs if not doc.parameter_type(param): - param_errs.append('Parameter "{}" has no type'.format(param)) + errs.append(('PR04', + 'Parameter "{}" has no type'.format(param))) else: if doc.parameter_type(param)[-1] == '.': - param_errs.append('Parameter "{}" type should ' - 'not finish with "."'.format(param)) + errs.append(('PR05', 'Parameter "{}" type should ' + 'not finish with "."'.format(param))) common_type_errors = [('integer', 'int'), ('boolean', 'bool'), ('string', 'str')] for incorrect_type, correct_type in common_type_errors: if incorrect_type in doc.parameter_type(param): - param_errs.append('Parameter "{}" type should use ' - '"{}" instead of "{}"' - .format(param, - correct_type, - incorrect_type)) + errs.append(('PR06', 'Parameter "{}" type ' + 'should use "{}" instead of "{}"' + .format(param, + correct_type, + incorrect_type))) if not doc.parameter_desc(param): - param_errs.append('Parameter "{}" ' - 'has no description'.format(param)) + errs.append(('PR07', 'Parameter "{}" ' + 'has no description'.format(param))) else: if not doc.parameter_desc(param)[0].isupper(): - param_errs.append('Parameter "{}" description ' - 'should start with a ' - 'capital letter'.format(param)) + errs.append(('PR08', 'Parameter "{}" description should start ' + 'with a capital letter'.format(param))) if doc.parameter_desc(param)[-1] != '.': - param_errs.append('Parameter "{}" description ' - 'should finish with "."'.format(param)) - if param_errs: - errs.append('Errors in parameters section') - for param_err in param_errs: - errs.append('\t{}'.format(param_err)) - - pep8_errs = list(doc.validate_pep8()) - if pep8_errs: - errs.append('Linting issues in doctests:') - for err in pep8_errs: - errs.append('\t{} {} {}'.format(err.count, err.error_code, - err.message)) + errs.append(('PR09', 'Parameter "{}" description ' + 'should finish with "."'.format(param))) if doc.is_function_or_method: if not doc.returns and "return" in doc.method_source: - errs.append('No Returns section found') + errs.append(('RT01', 'No Returns section found')) if not doc.yields and "yield" in doc.method_source: - errs.append('No Yields section found') - - mentioned_errs = doc.mentioned_private_classes - if mentioned_errs: - errs.append('Private classes ({}) should not be mentioned in public ' - 'docstring.'.format(mentioned_errs)) + errs.append(('YD01', 'No Yields section found')) if not doc.see_also: - wrns.append('See Also section not found') + wrns.append(('SA01', 'See Also section not found')) else: for rel_name, rel_desc in doc.see_also.items(): if rel_desc: if not rel_desc.endswith('.'): - errs.append('Missing period at end of description for ' - 'See Also "{}" reference'.format(rel_name)) + errs.append(('SA02', 'Missing period at end of ' + 'description for See Also "{}" ' + 'reference'.format(rel_name))) if not rel_desc[0].isupper(): - errs.append('Description should be capitalized for ' - 'See Also "{}" reference'.format(rel_name)) + errs.append(('SA03', + 'Description should be capitalized for See ' + 'Also "{}" reference'.format(rel_name))) else: - errs.append('Missing description for ' - 'See Also "{}" reference'.format(rel_name)) + errs.append(('SA04', 'Missing description for ' + 'See Also "{}" reference'.format(rel_name))) if rel_name.startswith('pandas.'): - errs.append('{} in `See Also` section does not ' - 'need `pandas` prefix, use {} instead.' - .format(rel_name, rel_name[len('pandas.'):])) - for line in doc.raw_doc.splitlines(): - if re.match("^ *\t", line): - errs.append('Tabs found at the start of line "{}", ' - 'please use whitespace only'.format(line.lstrip())) + errs.append(('SA05', '{} in `See Also` section does not ' + 'need `pandas` prefix, use {} instead.' + .format(rel_name, rel_name[len('pandas.'):]))) examples_errs = '' if not doc.examples: - wrns.append('No examples section found') + wrns.append(('EX01', 'No examples section found')) else: examples_errs = doc.examples_errors if examples_errs: - errs.append('Examples do not pass tests') + errs.append(('EX02', 'Examples do not pass tests:' + '\n{}'.format(examples_errs))) + for err in doc.validate_pep8(): + errs.append(('EX03', 'flake8 error: {} {}{}'.format( + err.error_code, err.message, + ' ({} times)'.format(err.count) if err.count > 1 else ''))) examples_source_code = ''.join(doc.examples_source_code) - if 'import numpy' in examples_source_code: - errs.append("numpy does not need to be imported in the examples, " - "as it's assumed to be already imported as np") - if 'import pandas' in examples_source_code: - errs.append("pandas does not need to be imported in the examples, " - "as it's assumed to be already imported as pd") + for wrong_import in ('import numpy', 'import pandas'): + if wrong_import in examples_source_code: + errs.append(('EX04', "Do not {}, as it's imported " + 'automatically for the examples (numpy as np, ' + 'pandas as pd)'.format(wrong_import))) return {'type': doc.type, 'docstring': doc.clean_doc, @@ -678,13 +703,13 @@ def header(title, width=80, char='#'): output = '' for name, res in result.items(): - for error in res['errors']: - output += output_format.format(name=name, - path=res['file'], - row=res['file_line'], - code='ERR', - text='{}: {}'.format(name, - error)) + for err_code, err_desc in res['errors']: + output += output_format.format( + name=name, + path=res['file'], + row=res['file_line'], + code=err_code, + text='{}: {}'.format(name, err_desc)) sys.stderr.write(output) @@ -698,13 +723,17 @@ def header(title, width=80, char='#'): if result['errors']: sys.stderr.write('{} Errors found:\n'.format( len(result['errors']))) - for err in result['errors']: - sys.stderr.write('\t{}\n'.format(err)) + for err_code, err_desc in result['errors']: + # Failing examples are printed at the end + if err_code == 'EX02': + sys.stderr.write('\tExamples do not pass tests\n') + continue + sys.stderr.write('\t{}\n'.format(err_desc)) if result['warnings']: sys.stderr.write('{} Warnings found:\n'.format( len(result['warnings']))) - for wrn in result['warnings']: - sys.stderr.write('\t{}\n'.format(wrn)) + for wrn_code, wrn_desc in result['warnings']: + sys.stderr.write('\t{}\n'.format(wrn_desc)) if not result['errors']: sys.stderr.write('Docstring for "{}" correct. :)\n'.format( From bbb7c214c23720b9a58092eb61ff1be86f2a5fec Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 5 Nov 2018 17:59:54 +0000 Subject: [PATCH 4/7] Adding --errors parameter to be able to validate only specific errors --- scripts/validate_docstrings.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index db829b1749db5..b565114d30e7b 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -669,7 +669,7 @@ def validate_all(prefix): return result -def main(func_name, prefix, output_format): +def main(func_name, prefix, errors, output_format): def header(title, width=80, char='#'): full_line = char * width side_len = (width - len(title) - 2) // 2 @@ -683,8 +683,6 @@ def header(title, width=80, char='#'): if func_name is None: result = validate_all(prefix) - errors = itertools.chain(*(doc['errors'] for doc in result.values())) - num_errors = sum(1 for err in errors) if output_format == 'json': output = json.dumps(result) @@ -701,9 +699,15 @@ def header(title, width=80, char='#'): raise ValueError('Unknown output_format "{}"'.format( output_format)) - output = '' + num_errors, output = 0, '' for name, res in result.items(): for err_code, err_desc in res['errors']: + # The script would be faster if instead of filtering the + # errors after validating them, it didn't validate them + # initially. But that would complicate the code too much + if errors and err_code not in errors: + continue + num_errors += 1 output += output_format.format( name=name, path=res['file'], @@ -767,9 +771,14 @@ def header(title, width=80, char='#'): 'will make the script validate all the docstrings' 'of methods starting by this pattern. It is ' 'ignored if parameter function is provided') + argparser.add_argument('--errors', default=None, help='comma separated ' + 'list of error codes to validate. By default it ' + 'validates all errors (ignored when validating ' + 'a single docstring)') args = argparser.parse_args() if args.format not in format_opts: raise ValueError('--format argument must be one of {}'.format( str(format_opts)[1:-1])) - sys.exit(main(args.function, args.prefix, args.format)) + sys.exit(main(args.function, args.prefix, args.errors.split(','), + args.format)) From bb595f006d996bd992b0fc66f867194bfd9f2fa8 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 5 Nov 2018 18:10:44 +0000 Subject: [PATCH 5/7] Removing unused itertools --- scripts/validate_docstrings.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index b565114d30e7b..3521cc901085c 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -23,7 +23,6 @@ import pydoc import inspect import importlib -import itertools import doctest import tempfile From 2b6c61a295425c58d50e6a5eb7aa6c5a38d30a24 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 5 Nov 2018 22:30:20 +0000 Subject: [PATCH 6/7] Addressed comments from the reviews, and fixed and improved tests --- scripts/tests/test_validate_docstrings.py | 152 ++++++++++++++-------- scripts/validate_docstrings.py | 27 ++-- 2 files changed, 114 insertions(+), 65 deletions(-) diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index fba0910f1e080..cf8abd1680341 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -1,5 +1,4 @@ import io -import os import random import string import textwrap @@ -712,9 +711,9 @@ def test_bad_generic_functions(self, func): ('BadSummaries', 'no_capitalization', ('Summary must start with infinitive verb',)), ('BadSummaries', 'multi_line', - ('Summary should fit in a single line.',)), + ('Summary should fit in a single line',)), ('BadSummaries', 'two_paragraph_multi_line', - ('Summary should fit in a single line.',)), + ('Summary should fit in a single line',)), # Parameters tests ('BadParameters', 'missing_params', ('Parameters {**kwargs} not documented',)), @@ -753,27 +752,28 @@ def test_bad_generic_functions(self, func): marks=pytest.mark.xfail), # Examples tests ('BadGenericDocStrings', 'method', - ('numpy does not need to be imported in the examples',)), + ('Do not import numpy, as it is imported automatically',)), ('BadGenericDocStrings', 'method', - ('pandas does not need to be imported in the examples',)), + ('Do not import pandas, as it is imported automatically',)), # See Also tests ('BadSeeAlso', 'prefix_pandas', ('pandas.Series.rename in `See Also` section ' 'does not need `pandas` prefix',)), # Examples tests ('BadExamples', 'unused_import', - ('1 F401 \'pandas as pdf\' imported but unused',)), + ("flake8 error: F401 'pandas as pdf' imported but unused",)), ('BadExamples', 'indentation_is_not_a_multiple_of_four', - ('1 E111 indentation is not a multiple of four',)), + ('flake8 error: E111 indentation is not a multiple of four',)), ('BadExamples', 'missing_whitespace_around_arithmetic_operator', - ('1 E226 missing whitespace around arithmetic operator',)), + ('flake8 error: ' + 'E226 missing whitespace around arithmetic operator',)), ('BadExamples', 'missing_whitespace_after_comma', - ('3 E231 missing whitespace after \',\'',)), + ("flake8 error: E231 missing whitespace after ',' (3 times)",)), ]) def test_bad_examples(self, capsys, klass, func, msgs): result = validate_one(self._import_path(klass=klass, func=func)) for msg in msgs: - assert msg in ' '.join(result['errors']) + assert msg in ' '.join(err[1] for err in result['errors']) class ApiItems(object): @@ -852,45 +852,93 @@ def test_item_subsection(self, idx, subsection): assert result[idx][3] == subsection -def test_num_errors_for_validate_one(monkeypatch): - monkeypatch.setattr(validate_docstrings, 'validate_one', - lambda func_name: {'docstring': 'docstring1', - 'errors': ['err1', 'err2', 'err3'], - 'warnings': [], - 'examples_errors': ''}) - with open(os.devnull, 'w') as devnull: - num_errors = validate_docstrings.main('docstring1', devnull) - assert num_errors == 3 - - -def test_no_num_errors_for_validate_one(monkeypatch): - monkeypatch.setattr(validate_docstrings, 'validate_one', - lambda func_name: {'docstring': 'docstring1', - 'errors': [], - 'warnings': ['warn1'], - 'examples_errors': ''}) - with open(os.devnull, 'w') as devnull: - num_errors = validate_docstrings.main('docstring1', devnull) - assert num_errors == 0 - - -def test_num_errors_for_validate_all(monkeypatch): - monkeypatch.setattr(validate_docstrings, 'validate_all', - lambda: {'docstring1': {'errors': ['err1', - 'err2', - 'err3']}, - 'docstring2': {'errors': ['err4', - 'err5']}}) - with open(os.devnull, 'w') as devnull: - num_errors = validate_docstrings.main(None, devnull) - assert num_errors == 5 - - -def test_no_num_errors_for_validate_all(monkeypatch): - monkeypatch.setattr(validate_docstrings, 'validate_all', - lambda: {'docstring1': {'errors': [], - 'warnings': ['warn1']}, - 'docstring2': {'errors': []}}) - with open(os.devnull, 'w') as devnull: - num_errors = validate_docstrings.main(None, devnull) - assert num_errors == 0 +class MainFunction(object): + def test_num_errors_for_validate_one(self, monkeypatch): + monkeypatch.setattr( + validate_docstrings, 'validate_one', + lambda func_name: {'docstring': 'docstring1', + 'errors': [('ER01', 'err desc'), + ('ER02', 'err desc') + ('ER03', 'err desc')], + 'warnings': [], + 'examples_errors': ''}) + num_errors = validate_docstrings.main(func_name='docstring1', + prefix=None, + errors=[], + output_format='default') + assert num_errors == 3 + + def test_no_num_errors_for_validate_one(self, monkeypatch): + monkeypatch.setattr( + validate_docstrings, 'validate_one', + lambda func_name: {'docstring': 'docstring1', + 'errors': [], + 'warnings': [('WN01', 'warn desc')], + 'examples_errors': ''}) + num_errors = validate_docstrings.main(func_name='docstring1', + prefix=None, + errors=[], + output_format='default') + assert num_errors == 0 + + def test_num_errors_for_validate_all(self, monkeypatch): + monkeypatch.setattr( + validate_docstrings, 'validate_all', + lambda: {'docstring1': {'errors': [('ER01', 'err desc'), + ('ER02', 'err desc'), + ('ER03', 'err desc')]}, + 'docstring2': {'errors': [('ER04', 'err desc'), + ('ER05', 'err desc')]}}) + num_errors = validate_docstrings.main(func_name=None, + prefix=None, + errors=[], + output_format='default') + assert num_errors == 5 + + def test_no_num_errors_for_validate_all(self, monkeypatch): + monkeypatch.setattr( + validate_docstrings, 'validate_all', + lambda: {'docstring1': {'errors': [], + 'warnings': [('WN01', 'warn desc')]}, + 'docstring2': {'errors': []}}) + num_errors = validate_docstrings.main(func_name=None, + prefix=None, + errors=[], + output_format='default') + assert num_errors == 0 + + def test_prefix_param_filters_docstrings(self, monkeypatch): + monkeypatch.setattr( + validate_docstrings, 'validate_all', + lambda: {'Series.foo': {'errors': [('ER01', 'err desc'), + ('ER02', 'err desc'), + ('ER03', 'err desc')]}, + 'DataFrame.bar': {'errors': [('ER04', 'err desc'), + ('ER05', 'err desc')]}, + 'Series.foobar': {'errors': [('ER06', 'err desc')]}}) + num_errors = validate_docstrings.main(func_name=None, + prefix='Series.', + errors=[], + output_format='default') + assert num_errors == 4 + + def test_errors_param_filters_errors(self, monkeypatch): + monkeypatch.setattr( + validate_docstrings, 'validate_all', + lambda: {'Series.foo': {'errors': [('ER01', 'err desc'), + ('ER02', 'err desc'), + ('ER03', 'err desc')]}, + 'DataFrame.bar': {'errors': [('ER01', 'err desc'), + ('ER02', 'err desc')]}, + 'Series.foobar': {'errors': [('ER01', 'err desc')]}}) + num_errors = validate_docstrings.main(func_name=None, + prefix=None, + errors=['E01'], + output_format='default') + assert num_errors == 3 + + num_errors = validate_docstrings.main(func_name=None, + prefix=None, + errors=['E03'], + output_format='default') + assert num_errors == 1 diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 3521cc901085c..91902e93a4512 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -453,7 +453,7 @@ def validate_one(func_name): Notes ----- - The errors are codified in the next way: + The errors codes are defined as: - First two characters: Section where the error happens: * GL: Global (no section, like section ordering errors) * SS: Short summary @@ -469,9 +469,12 @@ def validate_one(func_name): * EX: Examples - Last two characters: Numeric error code inside the section - For example, RT02 is the second codified error in the Returns section. + For example, EX02 is the second codified error in the Examples section + (which in this case is assigned to examples that do not pass the tests). - The exact codes are available in the source code of this function. + The error codes, their corresponding error messages, and the details on how + they are validated, are not documented more than in the source code of this + function. """ doc = Docstring(func_name) @@ -598,7 +601,7 @@ def validate_one(func_name): examples_source_code = ''.join(doc.examples_source_code) for wrong_import in ('import numpy', 'import pandas'): if wrong_import in examples_source_code: - errs.append(('EX04', "Do not {}, as it's imported " + errs.append(('EX04', "Do not {}, as it is imported " 'automatically for the examples (numpy as np, ' 'pandas as pd)'.format(wrong_import))) @@ -620,9 +623,9 @@ def validate_all(prefix): Parameters ---------- - prefix : str, optional + prefix : str or None If provided, only the docstrings that start with this pattern will be - validated. + validated. If None, all docstrings will be validated. Returns ------- @@ -760,9 +763,9 @@ def header(title, width=80, char='#'): nargs='?', default=None, help=func_help) - argparser.add_argument('--format', default='default', help='format of the ' - 'output when validating multiple docstrings ' - '(ignored when validating one). ' + argparser.add_argument('--format', default='default', choices=format_opts, + help='format of the output when validating ' + 'multiple docstrings (ignored when validating one).' 'It can be {}'.format(str(format_opts)[1:-1])) argparser.add_argument('--prefix', default=None, help='pattern for the ' 'docstring names, in order to decide which ones ' @@ -776,8 +779,6 @@ def header(title, width=80, char='#'): 'a single docstring)') args = argparser.parse_args() - if args.format not in format_opts: - raise ValueError('--format argument must be one of {}'.format( - str(format_opts)[1:-1])) - sys.exit(main(args.function, args.prefix, args.errors.split(','), + sys.exit(main(args.function, args.prefix, + args.errors.split(',') if args.errors else None, args.format)) From f0ab4c3d2e4558a09fca22a7a7b257df5869dfe5 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Tue, 6 Nov 2018 10:23:45 +0000 Subject: [PATCH 7/7] Moving all error descriptions to a dictionary --- scripts/validate_docstrings.py | 201 ++++++++++++++++++++------------- 1 file changed, 125 insertions(+), 76 deletions(-) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 91902e93a4512..08fd3a4ce54d4 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -47,6 +47,83 @@ PRIVATE_CLASSES = ['NDFrame', 'IndexOpsMixin'] DIRECTIVES = ['versionadded', 'versionchanged', 'deprecated'] +ERROR_MSGS = { + 'GL01': 'Docstring text (summary) should start in the line immediately ' + 'after the opening quotes (not in the same line, or leaving a ' + 'blank line in between)', + 'GL02': 'Closing quotes should be placed in the line after the last text ' + 'in the docstring (do not close the quotes in the same line as ' + 'the text, or leave a blank line between the last text and the ' + 'quotes)', + 'GL03': 'Use only one blank line to separate sections or paragraphs', + 'GL04': 'Private classes ({mentioned_private_classes}) should not be ' + 'mentioned in public docstring', + 'GL05': 'Tabs found at the start of line "{line_with_tabs}", please use ' + 'whitespace only', + 'SS01': 'No summary found (a short summary in a single line should be ' + 'present at the beginning of the docstring)', + 'SS02': 'Summary does not start with a capital letter', + 'SS03': 'Summary does not end with a period', + 'SS04': 'Summary contains heading whitespaces', + 'SS05': 'Summary must start with infinitive verb, not third person ' + '(e.g. use "Generate" instead of "Generates")', + 'SS06': 'Summary should fit in a single line', + 'ES01': 'No extended summary found', + 'PR01': 'Parameters {missing_params} not documented', + 'PR02': 'Unknown parameters {unknown_params}', + 'PR03': 'Wrong parameters order. Actual: {actual_params}. ' + 'Documented: {documented_params}', + 'PR04': 'Parameter "{param_name}" has no type', + 'PR05': 'Parameter "{param_name}" type should not finish with "."', + 'PR06': 'Parameter "{param_name}" type should use "{right_type}" instead ' + 'of "{wrong_type}"', + 'PR07': 'Parameter "{param_name}" has no description', + 'PR08': 'Parameter "{param_name}" description should start with a ' + 'capital letter', + 'PR09': 'Parameter "{param_name}" description should finish with "."', + 'RT01': 'No Returns section found', + 'YD01': 'No Yields section found', + 'SA01': 'See Also section not found', + 'SA02': 'Missing period at end of description for See Also ' + '"{reference_name}" reference', + 'SA03': 'Description should be capitalized for See Also ' + '"{reference_name}" reference', + 'SA04': 'Missing description for See Also "{reference_name}" reference', + 'SA05': '{reference_name} in `See Also` section does not need `pandas` ' + 'prefix, use {right_reference} instead.', + 'EX01': 'No examples section found', + 'EX02': 'Examples do not pass tests:\n{doctest_log}', + 'EX03': 'flake8 error: {error_code} {error_message}{times_happening}', + 'EX04': 'Do not import {imported_library}, as it is imported ' + 'automatically for the examples (numpy as np, pandas as pd)', +} + + +def error(code, **kwargs): + """ + Return a tuple with the error code and the message with variables replaced. + + This is syntactic sugar so instead of: + - `('EX02', ERROR_MSGS['EX02'].format(doctest_log=log))` + + We can simply use: + - `error('EX02', doctest_log=log)` + + Parameters + ---------- + code : str + Error code. + **kwargs + Values for the variables in the error messages + + Returns + ------- + code : str + Error code. + message : str + Error message with varaibles replaced. + """ + return (code, ERROR_MSGS[code].format(**kwargs)) def get_api_items(api_doc_fd): @@ -322,18 +399,15 @@ def parameter_mismatches(self): doc_params = tuple(self.doc_parameters) missing = set(signature_params) - set(doc_params) if missing: - errs.append(('PR01', - 'Parameters {} not documented'.format( - pprint_thing(missing)))) + errs.append(error('PR01', missing_params=pprint_thing(missing))) extra = set(doc_params) - set(signature_params) if extra: - errs.append(('PR02', 'Unknown parameters {}'.format( - pprint_thing(extra)))) + errs.append(error('PR02', unknown_params=pprint_thing(extra))) if (not missing and not extra and signature_params != doc_params and not (not signature_params and not doc_params)): - errs.append(('PR03', 'Wrong parameters order. ' + - 'Actual: {!r}. '.format(signature_params) + - 'Documented: {!r}'.format(doc_params))) + errs.append(error('PR03', + actual_params=signature_params, + documented_params=doc_params)) return errs @@ -481,45 +555,32 @@ def validate_one(func_name): errs = [] wrns = [] if doc.start_blank_lines != 1: - errs.append(('GL01', 'Docstring text (summary) should start in the ' - 'line immediately after the opening quotes (not in the ' - 'same line, or leaving a blank line in between)')) + errs.append(error('GL01')) if doc.end_blank_lines != 1: - errs.append(('GL02', 'Closing quotes should be placed in the line ' - 'after the last text in the docstring (do not close the ' - 'quotes in the same line as the text, or leave a ' - 'blank line between the last text and the quotes)')) + errs.append(error('GL02')) if doc.double_blank_lines: - errs.append(('GL03', 'Use only one blank line to separate sections or ' - 'paragraphs')) + errs.append(error('GL03')) mentioned_errs = doc.mentioned_private_classes if mentioned_errs: - errs.append(('GL04', 'Private classes ({}) should not be mentioned in ' - 'public docstring.'.format(mentioned_errs))) + errs.append(error('GL04'), mentioned_private_classes=mentioned_errs) for line in doc.raw_doc.splitlines(): if re.match("^ *\t", line): - errs.append(('GL05', 'Tabs found at the start of line "{}", ' - 'please use whitespace only'.format(line.lstrip()))) + errs.append(error('GL05', line_with_tabs=line.lstrip())) if not doc.summary: - errs.append(('SS01', 'No summary found (a short summary in a single ' - 'line should be present at the beginning of the ' - 'docstring)')) + errs.append(error('SS01')) else: if not doc.summary[0].isupper(): - errs.append(('SS02', - 'Summary does not start with a capital letter')) + errs.append(error('SS02')) if doc.summary[-1] != '.': - errs.append(('SS03', 'Summary does not end with a period')) + errs.append(error('SS03')) if doc.summary != doc.summary.lstrip(): - errs.append(('SS04', 'Summary contains heading whitespaces')) + errs.append(error('SS04')) elif (doc.is_function_or_method and doc.summary.split(' ')[0][-1] == 's'): - errs.append(('SS05', 'Summary must start with infinitive verb, ' - 'not third person (e.g. use "Generate" instead of ' - '"Generates")')) + errs.append(error('SS05')) if doc.num_summary_lines > 1: - errs.append(('SS06', "Summary should fit in a single line")) + errs.append(error('SS06')) if not doc.extended_summary: wrns.append(('ES01', 'No extended summary found')) @@ -532,78 +593,66 @@ def validate_one(func_name): for param in doc.doc_parameters: if not param.startswith("*"): # Check can ignore var / kwargs if not doc.parameter_type(param): - errs.append(('PR04', - 'Parameter "{}" has no type'.format(param))) + errs.append(error('PR04', param_name=param)) else: if doc.parameter_type(param)[-1] == '.': - errs.append(('PR05', 'Parameter "{}" type should ' - 'not finish with "."'.format(param))) + errs.append(error('PR05', param_name=param)) common_type_errors = [('integer', 'int'), ('boolean', 'bool'), ('string', 'str')] - for incorrect_type, correct_type in common_type_errors: - if incorrect_type in doc.parameter_type(param): - errs.append(('PR06', 'Parameter "{}" type ' - 'should use "{}" instead of "{}"' - .format(param, - correct_type, - incorrect_type))) + for wrong_type, right_type in common_type_errors: + if wrong_type in doc.parameter_type(param): + errs.append(error('PR06', + param_name=param, + right_type=right_type, + wrong_type=wrong_type)) if not doc.parameter_desc(param): - errs.append(('PR07', 'Parameter "{}" ' - 'has no description'.format(param))) + errs.append(error('PR07', param_name=param)) else: if not doc.parameter_desc(param)[0].isupper(): - errs.append(('PR08', 'Parameter "{}" description should start ' - 'with a capital letter'.format(param))) + errs.append(error('PR08', param_name=param)) if doc.parameter_desc(param)[-1] != '.': - errs.append(('PR09', 'Parameter "{}" description ' - 'should finish with "."'.format(param))) + errs.append(error('PR09', param_name=param)) if doc.is_function_or_method: - if not doc.returns and "return" in doc.method_source: - errs.append(('RT01', 'No Returns section found')) - if not doc.yields and "yield" in doc.method_source: - errs.append(('YD01', 'No Yields section found')) + if not doc.returns and 'return' in doc.method_source: + errs.append(error('RT01')) + if not doc.yields and 'yield' in doc.method_source: + errs.append(error('YD01')) if not doc.see_also: - wrns.append(('SA01', 'See Also section not found')) + wrns.append(error('SA01')) else: for rel_name, rel_desc in doc.see_also.items(): if rel_desc: if not rel_desc.endswith('.'): - errs.append(('SA02', 'Missing period at end of ' - 'description for See Also "{}" ' - 'reference'.format(rel_name))) + errs.append(error('SA02', reference_name=rel_name)) if not rel_desc[0].isupper(): - errs.append(('SA03', - 'Description should be capitalized for See ' - 'Also "{}" reference'.format(rel_name))) + errs.append(error('SA03', reference_name=rel_name)) else: - errs.append(('SA04', 'Missing description for ' - 'See Also "{}" reference'.format(rel_name))) + errs.append(error('SA04', reference_name=rel_name)) if rel_name.startswith('pandas.'): - errs.append(('SA05', '{} in `See Also` section does not ' - 'need `pandas` prefix, use {} instead.' - .format(rel_name, rel_name[len('pandas.'):]))) + errs.append(error('SA05', + reference_name=rel_name, + right_reference=rel_name[len('pandas.'):])) examples_errs = '' if not doc.examples: - wrns.append(('EX01', 'No examples section found')) + wrns.append(error('EX01')) else: examples_errs = doc.examples_errors if examples_errs: - errs.append(('EX02', 'Examples do not pass tests:' - '\n{}'.format(examples_errs))) + errs.append(error('EX02', doctest_log=examples_errs)) for err in doc.validate_pep8(): - errs.append(('EX03', 'flake8 error: {} {}{}'.format( - err.error_code, err.message, - ' ({} times)'.format(err.count) if err.count > 1 else ''))) + errs.append(error('EX03', + error_code=err.error_code, + error_message=err.message, + times_happening=' ({} times)'.format(err.count) + if err.count > 1 else '')) examples_source_code = ''.join(doc.examples_source_code) - for wrong_import in ('import numpy', 'import pandas'): - if wrong_import in examples_source_code: - errs.append(('EX04', "Do not {}, as it is imported " - 'automatically for the examples (numpy as np, ' - 'pandas as pd)'.format(wrong_import))) + for wrong_import in ('numpy', 'pandas'): + if 'import {}'.format(wrong_import) in examples_source_code: + errs.append(error('EX04', imported_library=wrong_import)) return {'type': doc.type, 'docstring': doc.clean_doc,