From 4fa85c69885509b8903f8f26f38af95827a9626d Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 2 Jul 2020 12:18:32 -0700 Subject: [PATCH 01/12] Added notes on debugging extensions --- .../development/debugging_extensions.rst | 37 +++++++++++++++++++ doc/source/development/index.rst | 1 + setup.py | 20 ++++++---- 3 files changed, 50 insertions(+), 8 deletions(-) create mode 100644 doc/source/development/debugging_extensions.rst diff --git a/doc/source/development/debugging_extensions.rst b/doc/source/development/debugging_extensions.rst new file mode 100644 index 0000000000000..904cabd66d239 --- /dev/null +++ b/doc/source/development/debugging_extensions.rst @@ -0,0 +1,37 @@ +.. _debugging_c_extensions: + +{{ header }} + +********************** +Debugging C Extensions +********************** + +Pandas uses select C extensions for high performance IO operations. In case you need to debug segfaults or general issues with those extensions, the following steps may be helpful. These steps are geared towards using lldb as a debugger, though the steps for gdb will be similar. + +First, be sure to compile the extensions with the appropriate flags to generate debug symbols and remove optimizations. This can be achieved as follows: + +.. code-block:: sh + + python setup.py build_ext --inplace -j4 --with-debugging-symbols + +Next you can create a script that hits the extension module you are looking to debug and place it in the project root. Thereafter launch a Python process under lldb: + +.. code-block:: sh + + lldb run python + +If desired, set breakpoints at various file locations using the below syntax: + +.. code-block:: sh + + breakpoint set --file pandas/_libs/src/ujson/python/objToJSON.c --line 1547 + +At this point you may get *WARNING: Unable to resolve breakpoint to any actual locations.*. If you have not yet executed anything it is possible that this module has not been loaded into memory, which is why the location cannot be resolved. You can simply ignore for now as it will bind when we actually execute code. + +Finally go ahead and execute your script: + +.. code-block:: sh + + run .py + +Code execution will halt at the breakpoint defined or at the occurance of any segfault. LLDB's `GDB to LLDB command map `_ provides a listing of debugger command that you can execute using either debugger. diff --git a/doc/source/development/index.rst b/doc/source/development/index.rst index f8a6bb6deb52d..2cb7f3a9fc6ef 100644 --- a/doc/source/development/index.rst +++ b/doc/source/development/index.rst @@ -16,6 +16,7 @@ Development code_style maintaining internals + debugging_extensions extending developer policies diff --git a/setup.py b/setup.py index e9d305d831653..898f81cc6cf5e 100755 --- a/setup.py +++ b/setup.py @@ -414,18 +414,16 @@ def run(self): # ---------------------------------------------------------------------- # Preparation of compiler arguments - -debugging_symbols_requested = "--with-debugging-symbols" in sys.argv -if debugging_symbols_requested: - sys.argv.remove("--with-debugging-symbols") - - if sys.byteorder == "big": endian_macro = [("__BIG_ENDIAN__", "1")] else: endian_macro = [("__LITTLE_ENDIAN__", "1")] +debugging_symbols_requested = "--with-debugging-symbols" in sys.argv +if debugging_symbols_requested: + sys.argv.remove("--with-debugging-symbols") + if is_platform_windows(): extra_compile_args = [] extra_link_args = [] @@ -435,8 +433,14 @@ def run(self): else: extra_compile_args = ["-Werror"] extra_link_args = [] - if debugging_symbols_requested: - extra_compile_args.append("-g") + if not debugging_symbols_requested: + # Strip debugging symbols (included by default) + extra_compile_args.append("-g0") + else: + # TODO: these should override the defaults provided by Python + # by being appended to end, but would ideally replace altogether + extra_compile_args.append("-UNDEBUG") + extra_compile_args.append("-O0") # Build for at least macOS 10.9 when compiling on a 10.9 system or above, # overriding CPython distuitls behaviour which is to target the version that From a792267b329c8a26a12c5faabacdb9e317cafbf0 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 2 Jul 2020 13:15:35 -0700 Subject: [PATCH 02/12] capitalization --- doc/source/development/debugging_extensions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/development/debugging_extensions.rst b/doc/source/development/debugging_extensions.rst index 904cabd66d239..f69542e3ae3c2 100644 --- a/doc/source/development/debugging_extensions.rst +++ b/doc/source/development/debugging_extensions.rst @@ -3,7 +3,7 @@ {{ header }} ********************** -Debugging C Extensions +Debugging C extensions ********************** Pandas uses select C extensions for high performance IO operations. In case you need to debug segfaults or general issues with those extensions, the following steps may be helpful. These steps are geared towards using lldb as a debugger, though the steps for gdb will be similar. From 61654dd333ce2a8f9ed621877c6c306fffc4ca90 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Mon, 6 Jul 2020 15:49:13 -0700 Subject: [PATCH 03/12] Update debugging_extensions.rst --- doc/source/development/debugging_extensions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/development/debugging_extensions.rst b/doc/source/development/debugging_extensions.rst index f69542e3ae3c2..6a007c4f05a89 100644 --- a/doc/source/development/debugging_extensions.rst +++ b/doc/source/development/debugging_extensions.rst @@ -18,7 +18,7 @@ Next you can create a script that hits the extension module you are looking to d .. code-block:: sh - lldb run python + lldb python If desired, set breakpoints at various file locations using the below syntax: From aa8ad1fa07c52f67af4cc4260f901e2d176d018e Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 6 Jul 2020 16:40:59 -0700 Subject: [PATCH 04/12] Added notes on test suite --- doc/source/development/debugging_extensions.rst | 14 ++++++++++++++ foo.py | 9 +++++++++ 2 files changed, 23 insertions(+) create mode 100644 foo.py diff --git a/doc/source/development/debugging_extensions.rst b/doc/source/development/debugging_extensions.rst index 6a007c4f05a89..44c77640386a2 100644 --- a/doc/source/development/debugging_extensions.rst +++ b/doc/source/development/debugging_extensions.rst @@ -35,3 +35,17 @@ Finally go ahead and execute your script: run .py Code execution will halt at the breakpoint defined or at the occurance of any segfault. LLDB's `GDB to LLDB command map `_ provides a listing of debugger command that you can execute using either debugger. + +Another option to execute the entire test suite under the debugger would be to run the following: + +.. code-block:: sh + + lldb -- python -m pytest + +Or for gdb + +.. code-block:: sh + + gdb --args python -m pytest + +Once the process launches, simply type ``run`` and the test suite will begin, stopping at any segmentation fault that may occur. diff --git a/foo.py b/foo.py new file mode 100644 index 0000000000000..0af2630403ab6 --- /dev/null +++ b/foo.py @@ -0,0 +1,9 @@ +import pandas as pd + +# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_json.html +#df = pd.DataFrame([['a', 'b'], ['c', 'd']],index=['row 1', 'row 2'],columns=['col 1', 'col 2']) + +df = pd.DataFrame([["a", "b"], ["c", "d"]], columns=["x", "x"]) +print(df.to_json(orient="split")) + + From ca29cfd2594fe65ad03c7c7ac5b4e5bf4e6600d9 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 6 Jul 2020 16:42:28 -0700 Subject: [PATCH 05/12] deleted errant file --- foo.py | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 foo.py diff --git a/foo.py b/foo.py deleted file mode 100644 index 0af2630403ab6..0000000000000 --- a/foo.py +++ /dev/null @@ -1,9 +0,0 @@ -import pandas as pd - -# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_json.html -#df = pd.DataFrame([['a', 'b'], ['c', 'd']],index=['row 1', 'row 2'],columns=['col 1', 'col 2']) - -df = pd.DataFrame([["a", "b"], ["c", "d"]], columns=["x", "x"]) -print(df.to_json(orient="split")) - - From e699c5b8e01a90962daf6f460f416dad250493f1 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 8 Jul 2020 08:41:07 -0700 Subject: [PATCH 06/12] xhochy feedback --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 898f81cc6cf5e..439de4d5cebfc 100755 --- a/setup.py +++ b/setup.py @@ -441,6 +441,7 @@ def run(self): # by being appended to end, but would ideally replace altogether extra_compile_args.append("-UNDEBUG") extra_compile_args.append("-O0") + extra_compile_args.append("-fno-omit-frame-pointer") # Build for at least macOS 10.9 when compiling on a 10.9 system or above, # overriding CPython distuitls behaviour which is to target the version that From 7166d52095d0602d91cb0bd3a30be7d12106af56 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 27 Jul 2020 11:01:24 -0700 Subject: [PATCH 07/12] valgrind notes --- .../development/debugging_extensions.rst | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/doc/source/development/debugging_extensions.rst b/doc/source/development/debugging_extensions.rst index 44c77640386a2..425d43e7b16df 100644 --- a/doc/source/development/debugging_extensions.rst +++ b/doc/source/development/debugging_extensions.rst @@ -2,9 +2,9 @@ {{ header }} -********************** +====================== Debugging C extensions -********************** +====================== Pandas uses select C extensions for high performance IO operations. In case you need to debug segfaults or general issues with those extensions, the following steps may be helpful. These steps are geared towards using lldb as a debugger, though the steps for gdb will be similar. @@ -14,7 +14,10 @@ First, be sure to compile the extensions with the appropriate flags to generate python setup.py build_ext --inplace -j4 --with-debugging-symbols -Next you can create a script that hits the extension module you are looking to debug and place it in the project root. Thereafter launch a Python process under lldb: +Using a debugger +================ + +You can create a script that hits the extension module you are looking to debug and place it in the project root. Thereafter launch a Python process under lldb: .. code-block:: sh @@ -49,3 +52,14 @@ Or for gdb gdb --args python -m pytest Once the process launches, simply type ``run`` and the test suite will begin, stopping at any segmentation fault that may occur. + +Checking memory leaks with valgrind +=================================== + +You can use `Valgrind `_ to check for and log memory leaks in extensions. For instance, to check for a memory leak in a test from the suite you can run: + +.. code-block:: sh + + PYTHONMALLOC=malloc valgrind --leak-check=yes --track-origins=yes --log-file=valgrind-log.txt python -m pytest + +Note that code execution under valgrind will take much longer than usual. While you can run valgrind against extensions compiled with any optimization level, it is suggested to have optimizations turned off from compiled extensions to reduce the amount of false positives. The ``--with-debugging-symbols`` flag passed during package setup will do this for you automatically. From 83762ba04736aed0bd2a1bba95cf9f5d300453a9 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 29 Jul 2020 14:40:46 -0700 Subject: [PATCH 08/12] extra note --- doc/source/development/debugging_extensions.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/source/development/debugging_extensions.rst b/doc/source/development/debugging_extensions.rst index 425d43e7b16df..f1ec5f2133262 100644 --- a/doc/source/development/debugging_extensions.rst +++ b/doc/source/development/debugging_extensions.rst @@ -63,3 +63,7 @@ You can use `Valgrind `_ to check for and log memory l PYTHONMALLOC=malloc valgrind --leak-check=yes --track-origins=yes --log-file=valgrind-log.txt python -m pytest Note that code execution under valgrind will take much longer than usual. While you can run valgrind against extensions compiled with any optimization level, it is suggested to have optimizations turned off from compiled extensions to reduce the amount of false positives. The ``--with-debugging-symbols`` flag passed during package setup will do this for you automatically. + +.. note:: + + For best results, you should run use a Python installation configured with Valgrind support (--with-valgrind) From d80688cf9a32f085a2e6f5179fbae1e8487ecfac Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 8 Dec 2020 16:18:16 -0800 Subject: [PATCH 09/12] cleaned up spaces --- doc/source/development/index.rst | 2 +- setup.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/development/index.rst b/doc/source/development/index.rst index 9d0e99c519c03..abe2fc1409bfb 100644 --- a/doc/source/development/index.rst +++ b/doc/source/development/index.rst @@ -17,7 +17,7 @@ Development maintaining internals test_writing - debugging_extensions + debugging_extensions extending developer policies diff --git a/setup.py b/setup.py index b25d5c6552313..c6f62db774f47 100755 --- a/setup.py +++ b/setup.py @@ -425,7 +425,6 @@ def run(self): extra_compile_args.append("-g") extra_compile_args.append("-UNDEBUG") extra_compile_args.append("-O0") ->>>>>>> upstream/master # Build for at least macOS 10.9 when compiling on a 10.9 system or above, # overriding CPython distuitls behaviour which is to target the version that From 1c67b2d4da91351bf5b06e6330d5b792e3dd7784 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 8 Dec 2020 16:46:47 -0800 Subject: [PATCH 10/12] Steps for gdb and lldb --- .../development/debugging_extensions.rst | 38 +++++++++++++++---- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/doc/source/development/debugging_extensions.rst b/doc/source/development/debugging_extensions.rst index f1ec5f2133262..7723afba87649 100644 --- a/doc/source/development/debugging_extensions.rst +++ b/doc/source/development/debugging_extensions.rst @@ -6,7 +6,7 @@ Debugging C extensions ====================== -Pandas uses select C extensions for high performance IO operations. In case you need to debug segfaults or general issues with those extensions, the following steps may be helpful. These steps are geared towards using lldb as a debugger, though the steps for gdb will be similar. +Pandas uses select C extensions for high performance IO operations. In case you need to debug segfaults or general issues with those extensions, the following steps may be helpful. First, be sure to compile the extensions with the appropriate flags to generate debug symbols and remove optimizations. This can be achieved as follows: @@ -17,21 +17,45 @@ First, be sure to compile the extensions with the appropriate flags to generate Using a debugger ================ -You can create a script that hits the extension module you are looking to debug and place it in the project root. Thereafter launch a Python process under lldb: +Assuming you are on a Unix-like operating system, you can use either lldb or gdb to debug. The choice between either is largely dependent on your compilation toolchain - typically you would use lldb if using clang and gdb if using gcc. For macOS users, please note that ``gcc`` is on modern systems an alias for ``clang``, so if using Xcode you usually opt for lldb. Regardless of which debugger you choose, please refer to your operating systems instructions on how to install. + +After installing a debugger you can create a script that hits the extension module you are looking to debug. For demonstration purposes, let's assume you have a script called ``debug_testing.py`` with the following contents: + +.. code-block:: + + import pandas as pd + + pd.DataFrame([[1, 2]]).to_json() + +Place the ``debug_testing.py`` script in the project root and launch a Python process under your debugger. If using lldb: .. code-block:: sh lldb python -If desired, set breakpoints at various file locations using the below syntax: +If using gdb: .. code-block:: sh - breakpoint set --file pandas/_libs/src/ujson/python/objToJSON.c --line 1547 + gdb python + +Before executing our script, let's set a breakpoint in our JSON serializer in its entry function called ``objToJSON``. The lldb syntax would look as follows: + +.. code-block:: sh + + breakpoint set --name objToJSON + +Similarly for gdb: + +.. code-block:: sh + + break objToJSON + +.. note:: -At this point you may get *WARNING: Unable to resolve breakpoint to any actual locations.*. If you have not yet executed anything it is possible that this module has not been loaded into memory, which is why the location cannot be resolved. You can simply ignore for now as it will bind when we actually execute code. + You may get a warning that this breakpoint cannot be resolved in lldb. gdb may give a similar warning and prompt you to make the breakpoint on a future library load, which you should say yes to. This should only happen on the very first invocation as the module you wish to debug has not yet been loaded into memory. -Finally go ahead and execute your script: +Now go ahead and execute your script: .. code-block:: sh @@ -39,7 +63,7 @@ Finally go ahead and execute your script: Code execution will halt at the breakpoint defined or at the occurance of any segfault. LLDB's `GDB to LLDB command map `_ provides a listing of debugger command that you can execute using either debugger. -Another option to execute the entire test suite under the debugger would be to run the following: +Another option to execute the entire test suite under lldb would be to run the following: .. code-block:: sh From aa324e5bfe0f28b88da19031df80f10401e29dba Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 8 Dec 2020 16:48:34 -0800 Subject: [PATCH 11/12] Added python directive to code-block --- doc/source/development/debugging_extensions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/development/debugging_extensions.rst b/doc/source/development/debugging_extensions.rst index 7723afba87649..358c4036df961 100644 --- a/doc/source/development/debugging_extensions.rst +++ b/doc/source/development/debugging_extensions.rst @@ -21,7 +21,7 @@ Assuming you are on a Unix-like operating system, you can use either lldb or gdb After installing a debugger you can create a script that hits the extension module you are looking to debug. For demonstration purposes, let's assume you have a script called ``debug_testing.py`` with the following contents: -.. code-block:: +.. code-block:: python import pandas as pd From 3b8de2e4a686abcf553498bd51fc5176d292c382 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 8 Dec 2020 16:49:19 -0800 Subject: [PATCH 12/12] removed extra newlines --- setup.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/setup.py b/setup.py index c6f62db774f47..a25fe95e025b3 100755 --- a/setup.py +++ b/setup.py @@ -403,14 +403,12 @@ def run(self): sys.argv.remove("--with-debugging-symbols") - if sys.byteorder == "big": endian_macro = [("__BIG_ENDIAN__", "1")] else: endian_macro = [("__LITTLE_ENDIAN__", "1")] - extra_compile_args = [] extra_link_args = [] if is_platform_windows():