Skip to content

Commit

Permalink
BUG: Improved thread safety for read_html() GH16928
Browse files Browse the repository at this point in the history
  • Loading branch information
3553x committed Jul 21, 2017
1 parent 7ffe7fc commit c01caaf
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 3 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ I/O

- Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`)

- Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`)

Plotting
^^^^^^^^
- Bug in plotting methods using ``secondary_y`` and ``fontsize`` not setting secondary axis font size (:issue:`12565`)
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ def _importers():
if _IMPORTS:
return

_IMPORTS = True

global _HAS_BS4, _HAS_LXML, _HAS_HTML5LIB

try:
Expand All @@ -59,6 +57,8 @@ def _importers():
except ImportError:
pass

_IMPORTS = True


#############
# READ HTML #
Expand Down
36 changes: 35 additions & 1 deletion pandas/tests/io/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,17 @@
import glob
import os
import re
import threading
import warnings


# imports needed for Python 3.x but will fail under Python 2.x
try:
from importlib import import_module
from importlib import import_module, reload
except ImportError:
import_module = __import__


from distutils.version import LooseVersion

import pytest
Expand All @@ -22,6 +26,7 @@
from pandas.compat import (map, zip, StringIO, string_types, BytesIO,
is_platform_windows, PY3)
from pandas.io.common import URLError, urlopen, file_path_to_url
import pandas.io.html
from pandas.io.html import read_html
from pandas._libs.parsers import ParserError

Expand Down Expand Up @@ -931,3 +936,32 @@ def test_same_ordering():
dfs_lxml = read_html(filename, index_col=0, flavor=['lxml'])
dfs_bs4 = read_html(filename, index_col=0, flavor=['bs4'])
assert_framelist_equal(dfs_lxml, dfs_bs4)


class ErrorThread(threading.Thread):
def run(self):
try:
super(ErrorThread, self).run()
except Exception as e:
self.err = e
else:
self.err = None


@pytest.mark.slow
def test_importcheck_thread_safety():
# see gh-16928

# force import check by reinitalising global vars in html.py
reload(pandas.io.html)

filename = os.path.join(DATA_PATH, 'valid_markup.html')
helper_thread1 = ErrorThread(target=read_html, args=(filename,))
helper_thread2 = ErrorThread(target=read_html, args=(filename,))

helper_thread1.start()
helper_thread2.start()

while helper_thread1.is_alive() or helper_thread2.is_alive():
pass
assert None is helper_thread1.err is helper_thread2.err

0 comments on commit c01caaf

Please sign in to comment.