diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 4c1fd20ea921b8..a5d32a04fc2d7d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.0.288
+    rev: v0.0.292
     hooks:
       - id: ruff
         name: Run Ruff on Lib/test/
diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst
index 60f5c81cff572c..d164d1a752e295 100644
--- a/Doc/c-api/init.rst
+++ b/Doc/c-api/init.rst
@@ -870,6 +870,19 @@ code, or when embedding the Python interpreter:
    When the current thread state is ``NULL``, this issues a fatal error (so that
    the caller needn't check for ``NULL``).
 
+   See also :c:func:`PyThreadState_GetUnchecked`.
+
+
+.. c:function:: PyThreadState* PyThreadState_GetUnchecked()
+
+   Similar to :c:func:`PyThreadState_Get`, but don't kill the process with a
+   fatal error if it is NULL. The caller is responsible to check if the result
+   is NULL.
+
+   .. versionadded:: 3.13
+      In Python 3.5 to 3.12, the function was private and known as
+      ``_PyThreadState_UncheckedGet()``.
+
 
 .. c:function:: PyThreadState* PyThreadState_Swap(PyThreadState *tstate)
 
diff --git a/Doc/c-api/object.rst b/Doc/c-api/object.rst
index bf55b5788efa47..a4e3e74861a315 100644
--- a/Doc/c-api/object.rst
+++ b/Doc/c-api/object.rst
@@ -489,3 +489,21 @@ Object Protocol
    :c:macro:`Py_TPFLAGS_ITEMS_AT_END` set.
 
    .. versionadded:: 3.12
+
+.. c:function:: int PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg)
+
+   Visit the managed dictionary of *obj*.
+
+   This function must only be called in a traverse function of the type which
+   has the :c:macro:`Py_TPFLAGS_MANAGED_DICT` flag set.
+
+   .. versionadded:: 3.13
+
+.. c:function:: void PyObject_ClearManagedDict(PyObject *obj)
+
+   Clear the managed dictionary of *obj*.
+
+   This function must only be called in a traverse function of the type which
+   has the :c:macro:`Py_TPFLAGS_MANAGED_DICT` flag set.
+
+   .. versionadded:: 3.13
diff --git a/Doc/c-api/typeobj.rst b/Doc/c-api/typeobj.rst
index 1fa3f2a6f53735..10c05beda7c66f 100644
--- a/Doc/c-api/typeobj.rst
+++ b/Doc/c-api/typeobj.rst
@@ -1131,6 +1131,9 @@ and :c:data:`PyType_Type` effectively act as defaults.)
 
       If this flag is set, :c:macro:`Py_TPFLAGS_HAVE_GC` should also be set.
 
+      The type traverse function must call :c:func:`PyObject_VisitManagedDict`
+      and its clear function must call :c:func:`PyObject_ClearManagedDict`.
+
       .. versionadded:: 3.12
 
       **Inheritance:**
@@ -1368,6 +1371,23 @@ and :c:data:`PyType_Type` effectively act as defaults.)
    debugging aid you may want to visit it anyway just so the :mod:`gc` module's
    :func:`~gc.get_referents` function will include it.
 
+   Heap types (:c:macro:`Py_TPFLAGS_HEAPTYPE`) must visit their type with::
+
+       Py_VISIT(Py_TYPE(self));
+
+   It is only needed since Python 3.9. To support Python 3.8 and older, this
+   line must be conditionnal::
+
+       #if PY_VERSION_HEX >= 0x03090000
+           Py_VISIT(Py_TYPE(self));
+       #endif
+
+   If the :c:macro:`Py_TPFLAGS_MANAGED_DICT` bit is set in the
+   :c:member:`~PyTypeObject.tp_flags` field, the traverse function must call
+   :c:func:`PyObject_VisitManagedDict` like this::
+
+       PyObject_VisitManagedDict((PyObject*)self, visit, arg);
+
    .. warning::
        When implementing :c:member:`~PyTypeObject.tp_traverse`, only the
        members that the instance *owns* (by having :term:`strong references
@@ -1451,6 +1471,12 @@ and :c:data:`PyType_Type` effectively act as defaults.)
    so that *self* knows the contained object can no longer be used.  The
    :c:func:`Py_CLEAR` macro performs the operations in a safe order.
 
+   If the :c:macro:`Py_TPFLAGS_MANAGED_DICT` bit is set in the
+   :c:member:`~PyTypeObject.tp_flags` field, the traverse function must call
+   :c:func:`PyObject_ClearManagedDict` like this::
+
+       PyObject_ClearManagedDict((PyObject*)self);
+
    Note that :c:member:`~PyTypeObject.tp_clear` is not *always* called
    before an instance is deallocated. For example, when reference counting
    is enough to determine that an object is no longer used, the cyclic garbage
@@ -1801,7 +1827,7 @@ and :c:data:`PyType_Type` effectively act as defaults.)
    field is ``NULL`` then no :attr:`~object.__dict__` gets created for instances.
 
    If the :c:macro:`Py_TPFLAGS_MANAGED_DICT` bit is set in the
-   :c:member:`~PyTypeObject.tp_dict` field, then
+   :c:member:`~PyTypeObject.tp_flags` field, then
    :c:member:`~PyTypeObject.tp_dictoffset` will be set to ``-1``, to indicate
    that it is unsafe to use this field.
 
diff --git a/Doc/constraints.txt b/Doc/constraints.txt
index 54888eaab242ee..147de1271eb2b7 100644
--- a/Doc/constraints.txt
+++ b/Doc/constraints.txt
@@ -10,8 +10,7 @@ colorama<0.5
 imagesize<1.5
 Jinja2<3.2
 packaging<24
-# Pygments==2.15.0 breaks CI
-Pygments<2.16,!=2.15.0
+Pygments>=2.16.1,<3
 requests<3
 snowballstemmer<3
 sphinxcontrib-applehelp<1.1
diff --git a/Doc/library/__main__.rst b/Doc/library/__main__.rst
index fd60d92d4eb0f9..d378e40b3906c6 100644
--- a/Doc/library/__main__.rst
+++ b/Doc/library/__main__.rst
@@ -238,9 +238,9 @@ package.  For more details, see :ref:`intra-package-references` in the
 Idiomatic Usage
 ^^^^^^^^^^^^^^^
 
-The contents of ``__main__.py`` typically isn't fenced with
-``if __name__ == '__main__'`` blocks.  Instead, those files are kept short,
-functions to execute from other modules.  Those other modules can then be
+The content of ``__main__.py`` typically isn't fenced with an
+``if __name__ == '__main__'`` block.  Instead, those files are kept
+short and import functions to execute from other modules.  Those other modules can then be
 easily unit-tested and are properly reusable.
 
 If used, an ``if __name__ == '__main__'`` block will still work as expected
diff --git a/Doc/library/ast.rst b/Doc/library/ast.rst
index 10b3e2dae472e1..4ebbe0e5471c88 100644
--- a/Doc/library/ast.rst
+++ b/Doc/library/ast.rst
@@ -2483,26 +2483,26 @@ The following options are accepted:
 
 .. program:: ast
 
-.. cmdoption:: -h, --help
+.. option:: -h, --help
 
    Show the help message and exit.
 
-.. cmdoption:: -m <mode>
-               --mode <mode>
+.. option:: -m <mode>
+            --mode <mode>
 
    Specify what kind of code must be compiled, like the *mode* argument
    in :func:`parse`.
 
-.. cmdoption:: --no-type-comments
+.. option:: --no-type-comments
 
    Don't parse type comments.
 
-.. cmdoption:: -a, --include-attributes
+.. option:: -a, --include-attributes
 
    Include attributes such as line numbers and column offsets.
 
-.. cmdoption:: -i <indent>
-               --indent <indent>
+.. option:: -i <indent>
+            --indent <indent>
 
    Indentation of nodes in AST (number of spaces).
 
diff --git a/Doc/library/compileall.rst b/Doc/library/compileall.rst
index a7455aeb0ec1cd..df1eefab839cc1 100644
--- a/Doc/library/compileall.rst
+++ b/Doc/library/compileall.rst
@@ -26,28 +26,28 @@ compile Python sources.
 
 .. program:: compileall
 
-.. cmdoption:: directory ...
-               file ...
+.. option:: directory ...
+            file ...
 
    Positional arguments are files to compile or directories that contain
    source files, traversed recursively.  If no argument is given, behave as if
    the command line was :samp:`-l {<directories from sys.path>}`.
 
-.. cmdoption:: -l
+.. option:: -l
 
    Do not recurse into subdirectories, only compile source code files directly
    contained in the named or implied directories.
 
-.. cmdoption:: -f
+.. option:: -f
 
    Force rebuild even if timestamps are up-to-date.
 
-.. cmdoption:: -q
+.. option:: -q
 
    Do not print the list of files compiled. If passed once, error messages will
    still be printed. If passed twice (``-qq``), all output is suppressed.
 
-.. cmdoption:: -d destdir
+.. option:: -d destdir
 
    Directory prepended to the path to each file being compiled.  This will
    appear in compilation time tracebacks, and is also compiled in to the
@@ -55,45 +55,45 @@ compile Python sources.
    cases where the source file does not exist at the time the byte-code file is
    executed.
 
-.. cmdoption:: -s strip_prefix
-.. cmdoption:: -p prepend_prefix
+.. option:: -s strip_prefix
+.. option:: -p prepend_prefix
 
    Remove (``-s``) or append (``-p``) the given prefix of paths
    recorded in the ``.pyc`` files.
    Cannot be combined with ``-d``.
 
-.. cmdoption:: -x regex
+.. option:: -x regex
 
    regex is used to search the full path to each file considered for
    compilation, and if the regex produces a match, the file is skipped.
 
-.. cmdoption:: -i list
+.. option:: -i list
 
    Read the file ``list`` and add each line that it contains to the list of
    files and directories to compile.  If ``list`` is ``-``, read lines from
    ``stdin``.
 
-.. cmdoption:: -b
+.. option:: -b
 
    Write the byte-code files to their legacy locations and names, which may
    overwrite byte-code files created by another version of Python.  The default
    is to write files to their :pep:`3147` locations and names, which allows
    byte-code files from multiple versions of Python to coexist.
 
-.. cmdoption:: -r
+.. option:: -r
 
    Control the maximum recursion level for subdirectories.
    If this is given, then ``-l`` option will not be taken into account.
    :program:`python -m compileall <directory> -r 0` is equivalent to
    :program:`python -m compileall <directory> -l`.
 
-.. cmdoption:: -j N
+.. option:: -j N
 
    Use *N* workers to compile the files within the given directory.
-   If ``0`` is used, then the result of :func:`os.cpu_count()`
+   If ``0`` is used, then the result of :func:`os.process_cpu_count()`
    will be used.
 
-.. cmdoption:: --invalidation-mode [timestamp|checked-hash|unchecked-hash]
+.. option:: --invalidation-mode [timestamp|checked-hash|unchecked-hash]
 
    Control how the generated byte-code files are invalidated at runtime.
    The ``timestamp`` value, means that ``.pyc`` files with the source timestamp
@@ -106,17 +106,17 @@ compile Python sources.
    variable is not set, and ``checked-hash`` if the ``SOURCE_DATE_EPOCH``
    environment variable is set.
 
-.. cmdoption:: -o level
+.. option:: -o level
 
    Compile with the given optimization level. May be used multiple times
    to compile for multiple levels at a time (for example,
    ``compileall -o 1 -o 2``).
 
-.. cmdoption:: -e dir
+.. option:: -e dir
 
    Ignore symlinks pointing outside the given directory.
 
-.. cmdoption:: --hardlink-dupes
+.. option:: --hardlink-dupes
 
    If two ``.pyc`` files with different optimization level have
    the same content, use hard links to consolidate duplicate files.
diff --git a/Doc/library/concurrent.futures.rst b/Doc/library/concurrent.futures.rst
index 6503d1fcf70a32..dca51459a2df98 100644
--- a/Doc/library/concurrent.futures.rst
+++ b/Doc/library/concurrent.futures.rst
@@ -188,6 +188,10 @@ And::
       ThreadPoolExecutor now reuses idle worker threads before starting
       *max_workers* worker threads too.
 
+   .. versionchanged:: 3.13
+      Default value of *max_workers* is changed to
+      ``min(32, (os.process_cpu_count() or 1) + 4)``.
+
 
 .. _threadpoolexecutor-example:
 
@@ -243,7 +247,7 @@ to a :class:`ProcessPoolExecutor` will result in deadlock.
 
    An :class:`Executor` subclass that executes calls asynchronously using a pool
    of at most *max_workers* processes.  If *max_workers* is ``None`` or not
-   given, it will default to the number of processors on the machine.
+   given, it will default to :func:`os.process_cpu_count`.
    If *max_workers* is less than or equal to ``0``, then a :exc:`ValueError`
    will be raised.
    On Windows, *max_workers* must be less than or equal to ``61``. If it is not
@@ -301,6 +305,10 @@ to a :class:`ProcessPoolExecutor` will result in deadlock.
       different start method. See the :func:`os.fork` documentation for
       further explanation.
 
+   .. versionchanged:: 3.13
+      *max_workers* uses :func:`os.process_cpu_count` by default, instead of
+      :func:`os.cpu_count`.
+
 .. _processpoolexecutor-example:
 
 ProcessPoolExecutor Example
diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst
index fae0cf621323c8..cd85df8723a76b 100644
--- a/Doc/library/exceptions.rst
+++ b/Doc/library/exceptions.rst
@@ -220,10 +220,16 @@ The following exceptions are the exceptions that are usually raised.
    load a module.  Also raised when the "from list" in ``from ... import``
    has a name that cannot be found.
 
-   The :attr:`name` and :attr:`path` attributes can be set using keyword-only
-   arguments to the constructor. When set they represent the name of the module
-   that was attempted to be imported and the path to any file which triggered
-   the exception, respectively.
+   The optional *name* and *path* keyword-only arguments
+   set the corresponding attributes:
+
+   .. attribute:: name
+
+      The name of the module that was attempted to be imported.
+
+   .. attribute:: path
+
+      The path to any file which triggered the exception.
 
    .. versionchanged:: 3.3
       Added the :attr:`name` and :attr:`path` attributes.
diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst
index 6a4f2c76ae4e10..f931d0e399c9f2 100644
--- a/Doc/library/gzip.rst
+++ b/Doc/library/gzip.rst
@@ -268,23 +268,23 @@ Once executed the :mod:`gzip` module keeps the input file(s).
 Command line options
 ^^^^^^^^^^^^^^^^^^^^
 
-.. cmdoption:: file
+.. option:: file
 
    If *file* is not specified, read from :data:`sys.stdin`.
 
-.. cmdoption:: --fast
+.. option:: --fast
 
    Indicates the fastest compression method (less compression).
 
-.. cmdoption:: --best
+.. option:: --best
 
    Indicates the slowest compression method (best compression).
 
-.. cmdoption:: -d, --decompress
+.. option:: -d, --decompress
 
    Decompress the given file.
 
-.. cmdoption:: -h, --help
+.. option:: -h, --help
 
    Show the help message.
 
diff --git a/Doc/library/inspect.rst b/Doc/library/inspect.rst
index fe0ed135029f0f..d0c3dd761e4d56 100644
--- a/Doc/library/inspect.rst
+++ b/Doc/library/inspect.rst
@@ -1655,6 +1655,6 @@ By default, accepts the name of a module and prints the source of that
 module. A class or function within the module can be printed instead by
 appended a colon and the qualified name of the target object.
 
-.. cmdoption:: --details
+.. option:: --details
 
    Print information about the specified object rather than the source code
diff --git a/Doc/library/json.rst b/Doc/library/json.rst
index b337b5f9960e8e..0ce4b697145cb3 100644
--- a/Doc/library/json.rst
+++ b/Doc/library/json.rst
@@ -714,7 +714,7 @@ specified, :data:`sys.stdin` and :data:`sys.stdout` will be used respectively:
 Command line options
 ^^^^^^^^^^^^^^^^^^^^
 
-.. cmdoption:: infile
+.. option:: infile
 
    The JSON file to be validated or pretty-printed:
 
@@ -734,36 +734,36 @@ Command line options
 
    If *infile* is not specified, read from :data:`sys.stdin`.
 
-.. cmdoption:: outfile
+.. option:: outfile
 
    Write the output of the *infile* to the given *outfile*. Otherwise, write it
    to :data:`sys.stdout`.
 
-.. cmdoption:: --sort-keys
+.. option:: --sort-keys
 
    Sort the output of dictionaries alphabetically by key.
 
    .. versionadded:: 3.5
 
-.. cmdoption:: --no-ensure-ascii
+.. option:: --no-ensure-ascii
 
    Disable escaping of non-ascii characters, see :func:`json.dumps` for more information.
 
    .. versionadded:: 3.9
 
-.. cmdoption:: --json-lines
+.. option:: --json-lines
 
    Parse every input line as separate JSON object.
 
    .. versionadded:: 3.8
 
-.. cmdoption:: --indent, --tab, --no-indent, --compact
+.. option:: --indent, --tab, --no-indent, --compact
 
    Mutually exclusive options for whitespace control.
 
    .. versionadded:: 3.9
 
-.. cmdoption:: -h, --help
+.. option:: -h, --help
 
    Show the help message.
 
diff --git a/Doc/library/multiprocessing.rst b/Doc/library/multiprocessing.rst
index 2f0f1f800fdc94..d19f911dd7016c 100644
--- a/Doc/library/multiprocessing.rst
+++ b/Doc/library/multiprocessing.rst
@@ -996,13 +996,13 @@ Miscellaneous
 
    This number is not equivalent to the number of CPUs the current process can
    use.  The number of usable CPUs can be obtained with
-   ``len(os.sched_getaffinity(0))``
+   :func:`os.process_cpu_count`.
 
    When the number of CPUs cannot be determined a :exc:`NotImplementedError`
    is raised.
 
    .. seealso::
-      :func:`os.cpu_count`
+      :func:`os.cpu_count` and :func:`os.process_cpu_count`
 
 .. function:: current_process()
 
@@ -2214,7 +2214,7 @@ with the :class:`Pool` class.
    callbacks and has a parallel map implementation.
 
    *processes* is the number of worker processes to use.  If *processes* is
-   ``None`` then the number returned by :func:`os.cpu_count` is used.
+   ``None`` then the number returned by :func:`os.process_cpu_count` is used.
 
    If *initializer* is not ``None`` then each worker process will call
    ``initializer(*initargs)`` when it starts.
@@ -2249,6 +2249,10 @@ with the :class:`Pool` class.
    .. versionadded:: 3.4
       *context*
 
+   .. versionchanged:: 3.13
+      *processes* uses :func:`os.process_cpu_count` by default, instead of
+      :func:`os.cpu_count`.
+
    .. note::
 
       Worker processes within a :class:`Pool` typically live for the complete
@@ -2775,7 +2779,7 @@ worker threads rather than worker processes.
    :meth:`~multiprocessing.pool.Pool.terminate` manually.
 
    *processes* is the number of worker threads to use.  If *processes* is
-   ``None`` then the number returned by :func:`os.cpu_count` is used.
+   ``None`` then the number returned by :func:`os.process_cpu_count` is used.
 
    If *initializer* is not ``None`` then each worker process will call
    ``initializer(*initargs)`` when it starts.
diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst
index 6f9e0853bc8947..95933f56d50542 100644
--- a/Doc/library/os.path.rst
+++ b/Doc/library/os.path.rst
@@ -377,7 +377,8 @@ the :mod:`glob` module.)
 
    Return the canonical path of the specified filename, eliminating any symbolic
    links encountered in the path (if they are supported by the operating
-   system).
+   system). On Windows, this function will also resolve MS-DOS (also called 8.3)
+   style names such as ``C:\\PROGRA~1`` to ``C:\\Program Files``.
 
    If a path doesn't exist or a symlink loop is encountered, and *strict* is
    ``True``, :exc:`OSError` is raised. If *strict* is ``False``, the path is
diff --git a/Doc/library/os.rst b/Doc/library/os.rst
index 4ffd520f9ecd8b..141ab0bff5b4bf 100644
--- a/Doc/library/os.rst
+++ b/Doc/library/os.rst
@@ -5141,8 +5141,12 @@ operating system.
 
 .. function:: sched_getaffinity(pid, /)
 
-   Return the set of CPUs the process with PID *pid* (or the current process
-   if zero) is restricted to.
+   Return the set of CPUs the process with PID *pid* is restricted to.
+
+   If *pid* is zero, return the set of CPUs the calling thread of the current
+   process is restricted to.
+
+   See also the :func:`process_cpu_count` function.
 
 
 .. _os-path:
@@ -5183,12 +5187,11 @@ Miscellaneous System Information
 
 .. function:: cpu_count()
 
-   Return the number of CPUs in the system. Returns ``None`` if undetermined.
-
-   This number is not equivalent to the number of CPUs the current process can
-   use.  The number of usable CPUs can be obtained with
-   ``len(os.sched_getaffinity(0))``
+   Return the number of logical CPUs in the **system**. Returns ``None`` if
+   undetermined.
 
+   The :func:`process_cpu_count` function can be used to get the number of
+   logical CPUs usable by the calling thread of the **current process**.
 
    .. versionadded:: 3.4
 
@@ -5202,6 +5205,20 @@ Miscellaneous System Information
    .. availability:: Unix.
 
 
+.. function:: process_cpu_count()
+
+   Get the number of logical CPUs usable by the calling thread of the **current
+   process**. Returns ``None`` if undetermined. It can be less than
+   :func:`cpu_count` depending on the CPU affinity.
+
+   The :func:`cpu_count` function can be used to get the number of logical CPUs
+   in the **system**.
+
+   See also the :func:`sched_getaffinity` functions.
+
+   .. versionadded:: 3.13
+
+
 .. function:: sysconf(name, /)
 
    Return integer-valued system configuration values. If the configuration value
diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst
index 48d6176d26bb8f..8ee89a003a339a 100644
--- a/Doc/library/pathlib.rst
+++ b/Doc/library/pathlib.rst
@@ -850,6 +850,42 @@ call fails (for example because the path doesn't exist).
    .. versionadded:: 3.5
 
 
+.. classmethod:: Path.from_uri(uri)
+
+   Return a new path object from parsing a 'file' URI conforming to
+   :rfc:`8089`. For example::
+
+       >>> p = Path.from_uri('file:///etc/hosts')
+       PosixPath('/etc/hosts')
+
+   On Windows, DOS device and UNC paths may be parsed from URIs::
+
+       >>> p = Path.from_uri('file:///c:/windows')
+       WindowsPath('c:/windows')
+       >>> p = Path.from_uri('file://server/share')
+       WindowsPath('//server/share')
+
+   Several variant forms are supported::
+
+       >>> p = Path.from_uri('file:////server/share')
+       WindowsPath('//server/share')
+       >>> p = Path.from_uri('file://///server/share')
+       WindowsPath('//server/share')
+       >>> p = Path.from_uri('file:c:/windows')
+       WindowsPath('c:/windows')
+       >>> p = Path.from_uri('file:/c|/windows')
+       WindowsPath('c:/windows')
+
+   :exc:`ValueError` is raised if the URI does not start with ``file:``, or
+   the parsed path isn't absolute.
+
+   :func:`os.fsdecode` is used to decode percent-escaped byte sequences, and
+   so file URIs are not portable across machines with different
+   :ref:`filesystem encodings <filesystem-encoding>`.
+
+   .. versionadded:: 3.13
+
+
 .. method:: Path.stat(*, follow_symlinks=True)
 
    Return a :class:`os.stat_result` object containing information about this path, like :func:`os.stat`.
diff --git a/Doc/library/pickletools.rst b/Doc/library/pickletools.rst
index 76f5b0cadf975a..41930f8cbe8412 100644
--- a/Doc/library/pickletools.rst
+++ b/Doc/library/pickletools.rst
@@ -53,24 +53,24 @@ Command line options
 
 .. program:: pickletools
 
-.. cmdoption:: -a, --annotate
+.. option:: -a, --annotate
 
    Annotate each line with a short opcode description.
 
-.. cmdoption:: -o, --output=<file>
+.. option:: -o, --output=<file>
 
    Name of a file where the output should be written.
 
-.. cmdoption:: -l, --indentlevel=<num>
+.. option:: -l, --indentlevel=<num>
 
    The number of blanks by which to indent a new MARK level.
 
-.. cmdoption:: -m, --memo
+.. option:: -m, --memo
 
    When multiple objects are disassembled, preserve memo between
    disassemblies.
 
-.. cmdoption:: -p, --preamble=<preamble>
+.. option:: -p, --preamble=<preamble>
 
    When more than one pickle file are specified, print given preamble
    before each disassembly.
diff --git a/Doc/library/py_compile.rst b/Doc/library/py_compile.rst
index 5501db8f87de81..38c416f9ad0305 100644
--- a/Doc/library/py_compile.rst
+++ b/Doc/library/py_compile.rst
@@ -139,13 +139,13 @@ not be compiled.
 
 .. program:: python -m py_compile
 
-.. cmdoption:: <file> ... <fileN>
-               -
+.. option:: <file> ... <fileN>
+            -
 
    Positional arguments are files to compile.  If ``-`` is the only
    parameter, the list of files is taken from standard input.
 
-.. cmdoption:: -q, --quiet
+.. option:: -q, --quiet
 
    Suppress errors output.
 
diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst
index 4390a8e22306fa..d1949d698f5614 100644
--- a/Doc/library/shutil.rst
+++ b/Doc/library/shutil.rst
@@ -476,6 +476,12 @@ Directory and files operations
       or ends with an extension that is in ``PATHEXT``; and filenames that
       have no extension can now be found.
 
+   .. versionchanged:: 3.12.1
+      On Windows, if *mode* includes ``os.X_OK``, executables with an
+      extension in ``PATHEXT`` will be preferred over executables without a
+      matching extension.
+      This brings behavior closer to that of Python 3.11.
+
 .. exception:: Error
 
    This exception collects exceptions that are raised during a multi-file
diff --git a/Doc/library/site.rst b/Doc/library/site.rst
index ea3b2e996574ef..2dc9fb09d727e2 100644
--- a/Doc/library/site.rst
+++ b/Doc/library/site.rst
@@ -19,7 +19,7 @@ Importing this module will append site-specific paths to the module search path
 and add a few builtins, unless :option:`-S` was used.  In that case, this module
 can be safely imported with no automatic modifications to the module search path
 or additions to the builtins.  To explicitly trigger the usual site-specific
-additions, call the :func:`site.main` function.
+additions, call the :func:`main` function.
 
 .. versionchanged:: 3.3
    Importing the module used to trigger paths manipulation even when using
@@ -109,32 +109,40 @@ directory precedes the :file:`foo` directory because :file:`bar.pth` comes
 alphabetically before :file:`foo.pth`; and :file:`spam` is omitted because it is
 not mentioned in either path configuration file.
 
-.. index:: pair: module; sitecustomize
+:mod:`sitecustomize`
+--------------------
+
+.. module:: sitecustomize
 
 After these path manipulations, an attempt is made to import a module named
 :mod:`sitecustomize`, which can perform arbitrary site-specific customizations.
 It is typically created by a system administrator in the site-packages
 directory.  If this import fails with an :exc:`ImportError` or its subclass
-exception, and the exception's :attr:`name` attribute equals to ``'sitecustomize'``,
+exception, and the exception's :attr:`~ImportError.name`
+attribute equals to ``'sitecustomize'``,
 it is silently ignored.  If Python is started without output streams available, as
 with :file:`pythonw.exe` on Windows (which is used by default to start IDLE),
 attempted output from :mod:`sitecustomize` is ignored.  Any other exception
 causes a silent and perhaps mysterious failure of the process.
 
-.. index:: pair: module; usercustomize
+:mod:`usercustomize`
+--------------------
+
+.. module:: usercustomize
 
 After this, an attempt is made to import a module named :mod:`usercustomize`,
 which can perform arbitrary user-specific customizations, if
-:data:`ENABLE_USER_SITE` is true.  This file is intended to be created in the
+:data:`~site.ENABLE_USER_SITE` is true.  This file is intended to be created in the
 user site-packages directory (see below), which is part of ``sys.path`` unless
 disabled by :option:`-s`.  If this import fails with an :exc:`ImportError` or
-its subclass exception, and the exception's :attr:`name` attribute equals to
-``'usercustomize'``, it is silently ignored.
+its subclass exception, and the exception's :attr:`~ImportError.name`
+attribute equals to ``'usercustomize'``, it is silently ignored.
 
 Note that for some non-Unix systems, ``sys.prefix`` and ``sys.exec_prefix`` are
 empty, and the path manipulations are skipped; however the import of
 :mod:`sitecustomize` and :mod:`usercustomize` is still attempted.
 
+.. currentmodule:: site
 
 .. _rlcompleter-config:
 
@@ -191,7 +199,7 @@ Module contents
    :file:`~/Library/Python/{X.Y}` for macOS framework builds, and
    :file:`{%APPDATA%}\\Python` for Windows.  This value is used to
    compute the installation directories for scripts, data files, Python modules,
-   etc. for the user installation scheme.
+   etc. for the :ref:`user installation scheme <sysconfig-user-scheme>`.
    See also :envvar:`PYTHONUSERBASE`.
 
 
@@ -258,11 +266,11 @@ If it is called without arguments, it will print the contents of
 :data:`USER_BASE` and whether the directory exists, then the same thing for
 :data:`USER_SITE`, and finally the value of :data:`ENABLE_USER_SITE`.
 
-.. cmdoption:: --user-base
+.. option:: --user-base
 
    Print the path to the user base directory.
 
-.. cmdoption:: --user-site
+.. option:: --user-site
 
    Print the path to the user site-packages directory.
 
diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst
index 0abdab52340dfd..aa34bcc9388e1c 100644
--- a/Doc/library/sqlite3.rst
+++ b/Doc/library/sqlite3.rst
@@ -992,9 +992,8 @@ Connection objects
          Added support for disabling the authorizer using ``None``.
 
       .. versionchanged:: 3.13
-
-      Passing *authorizer_callback* as a keyword argument to is deprecated.
-      The parameter will become positional-only in Python 3.15.
+         Passing *authorizer_callback* as a keyword argument is deprecated.
+         The parameter will become positional-only in Python 3.15.
 
 
    .. method:: set_progress_handler(progress_handler, n)
@@ -1012,9 +1011,8 @@ Connection objects
       exception.
 
       .. versionchanged:: 3.13
-
-      Passing *progress_handler* as a keyword argument to is deprecated.
-      The parameter will become positional-only in Python 3.15.
+         Passing *progress_handler* as a keyword argument is deprecated.
+         The parameter will become positional-only in Python 3.15.
 
 
    .. method:: set_trace_callback(trace_callback)
@@ -1041,9 +1039,8 @@ Connection objects
       .. versionadded:: 3.3
 
       .. versionchanged:: 3.13
-
-      Passing *trace_callback* as a keyword argument to is deprecated.
-      The parameter will become positional-only in Python 3.15.
+         Passing *trace_callback* as a keyword argument is deprecated.
+         The parameter will become positional-only in Python 3.15.
 
 
    .. method:: enable_load_extension(enabled, /)
@@ -2440,9 +2437,9 @@ or if :attr:`~Connection.autocommit` is ``True``,
 the context manager does nothing.
 
 .. note::
-
    The context manager neither implicitly opens a new transaction
-   nor closes the connection.
+   nor closes the connection. If you need a closing context manager, consider
+   using :meth:`contextlib.closing`.
 
 .. testcode::
 
diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
index f3c1bf20ae3ac8..5c8ad3a7dd7380 100644
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -585,7 +585,7 @@ However, for reading convenience, most of the examples show sorted sequences.
 
    The *data* can be any iterable containing sample data.  For meaningful
    results, the number of data points in *data* should be larger than *n*.
-   Raises :exc:`StatisticsError` if there are not at least two data points.
+   Raises :exc:`StatisticsError` if there is not at least one data point.
 
    The cut points are linearly interpolated from the
    two nearest data points.  For example, if a cut point falls one-third
@@ -625,6 +625,11 @@ However, for reading convenience, most of the examples show sorted sequences.
 
    .. versionadded:: 3.8
 
+   .. versionchanged:: 3.13
+      No longer raises an exception for an input with only a single data point.
+      This allows quantile estimates to be built up one sample point
+      at a time becoming gradually more refined with each new data point.
+
 .. function:: covariance(x, y, /)
 
    Return the sample covariance of two inputs *x* and *y*. Covariance
diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst
index a351559a84f1ce..f45fd561d2bad3 100644
--- a/Doc/library/stdtypes.rst
+++ b/Doc/library/stdtypes.rst
@@ -804,6 +804,7 @@ number, :class:`float`, or :class:`complex`::
            hash_value = -2
        return hash_value
 
+.. _bltin-boolean-values:
 .. _typebool:
 
 Boolean Type - :class:`bool`
diff --git a/Doc/library/sys.monitoring.rst b/Doc/library/sys.monitoring.rst
index 7b02b95fd766a7..5dcbdaf8e5d0e4 100644
--- a/Doc/library/sys.monitoring.rst
+++ b/Doc/library/sys.monitoring.rst
@@ -147,7 +147,7 @@ by another event:
 * C_RAISE
 * C_RETURN
 
-The ``C_RETURN`` and ``C_RAISE`` events are are controlled by the ``CALL``
+The ``C_RETURN`` and ``C_RAISE`` events are controlled by the ``CALL``
 event. ``C_RETURN`` and ``C_RAISE`` events will only be seen if the
 corresponding ``CALL`` event is being monitored.
 
diff --git a/Doc/library/sysconfig.rst b/Doc/library/sysconfig.rst
index c625c1e1d72954..905abc3a7c9f9b 100644
--- a/Doc/library/sysconfig.rst
+++ b/Doc/library/sysconfig.rst
@@ -20,6 +20,7 @@ The :mod:`sysconfig` module provides access to Python's configuration
 information like the list of installation paths and the configuration variables
 relevant for the current platform.
 
+
 Configuration variables
 -----------------------
 
@@ -60,6 +61,7 @@ Example of usage::
    >>> sysconfig.get_config_vars('AR', 'CXX')
    ['ar', 'g++']
 
+
 .. _installation_paths:
 
 Installation paths
@@ -68,27 +70,24 @@ Installation paths
 Python uses an installation scheme that differs depending on the platform and on
 the installation options.  These schemes are stored in :mod:`sysconfig` under
 unique identifiers based on the value returned by :const:`os.name`.
-
-Every new component that is installed using :mod:`!distutils` or a
-Distutils-based system will follow the same scheme to copy its file in the right
-places.
+The schemes are used by package installers to determine where to copy files to.
 
 Python currently supports nine schemes:
 
 - *posix_prefix*: scheme for POSIX platforms like Linux or macOS.  This is
   the default scheme used when Python or a component is installed.
-- *posix_home*: scheme for POSIX platforms used when a *home* option is used
-  upon installation.  This scheme is used when a component is installed through
-  Distutils with a specific home prefix.
-- *posix_user*: scheme for POSIX platforms used when a component is installed
-  through Distutils and the *user* option is used.  This scheme defines paths
-  located under the user home directory.
+- *posix_home*: scheme for POSIX platforms, when the *home* option is used.
+  This scheme defines paths located under a specific home prefix.
+- *posix_user*: scheme for POSIX platforms, when the *user* option is used.
+  This scheme defines paths located under the user's home directory
+  (:const:`site.USER_BASE`).
 - *posix_venv*: scheme for :mod:`Python virtual environments <venv>` on POSIX
   platforms; by default it is the same as *posix_prefix*.
-- *nt*: scheme for NT platforms like Windows.
-- *nt_user*: scheme for NT platforms, when the *user* option is used.
-- *nt_venv*: scheme for :mod:`Python virtual environments <venv>` on NT
-  platforms; by default it is the same as *nt*.
+- *nt*: scheme for Windows.
+  This is the default scheme used when Python or a component is installed.
+- *nt_user*: scheme for Windows, when the *user* option is used.
+- *nt_venv*: scheme for :mod:`Python virtual environments <venv>` on Windows;
+  by default it is the same as *nt*.
 - *venv*: a scheme with values from either *posix_venv* or *nt_venv* depending
   on the platform Python runs on.
 - *osx_framework_user*: scheme for macOS, when the *user* option is used.
@@ -101,7 +100,7 @@ identifier.  Python currently uses eight paths:
 - *platstdlib*: directory containing the standard Python library files that are
   platform-specific.
 - *platlib*: directory for site-specific, platform-specific files.
-- *purelib*: directory for site-specific, non-platform-specific files.
+- *purelib*: directory for site-specific, non-platform-specific files ('pure' Python).
 - *include*: directory for non-platform-specific header files for
   the Python C-API.
 - *platinclude*: directory for platform-specific header files for
@@ -109,7 +108,157 @@ identifier.  Python currently uses eight paths:
 - *scripts*: directory for script files.
 - *data*: directory for data files.
 
-:mod:`sysconfig` provides some functions to determine these paths.
+
+.. _sysconfig-user-scheme:
+
+User scheme
+---------------
+
+This scheme is designed to be the most convenient solution for users that don't
+have write permission to the global site-packages directory or don't want to
+install into it.
+
+Files will be installed into subdirectories of :const:`site.USER_BASE` (written
+as :file:`{userbase}` hereafter).  This scheme installs pure Python modules and
+extension modules in the same location (also known as :const:`site.USER_SITE`).
+
+``posix_user``
+^^^^^^^^^^^^^^
+
+============== ===========================================================
+Path           Installation directory
+============== ===========================================================
+*stdlib*       :file:`{userbase}/lib/python{X.Y}`
+*platstdlib*   :file:`{userbase}/lib/python{X.Y}`
+*platlib*      :file:`{userbase}/lib/python{X.Y}/site-packages`
+*purelib*      :file:`{userbase}/lib/python{X.Y}/site-packages`
+*include*      :file:`{userbase}/include/python{X.Y}`
+*scripts*      :file:`{userbase}/bin`
+*data*         :file:`{userbase}`
+============== ===========================================================
+
+``nt_user``
+^^^^^^^^^^^
+
+============== ===========================================================
+Path           Installation directory
+============== ===========================================================
+*stdlib*       :file:`{userbase}\\Python{XY}`
+*platstdlib*   :file:`{userbase}\\Python{XY}`
+*platlib*      :file:`{userbase}\\Python{XY}\\site-packages`
+*purelib*      :file:`{userbase}\\Python{XY}\\site-packages`
+*include*      :file:`{userbase}\\Python{XY}\\Include`
+*scripts*      :file:`{userbase}\\Python{XY}\\Scripts`
+*data*         :file:`{userbase}`
+============== ===========================================================
+
+``osx_framework_user``
+^^^^^^^^^^^^^^^^^^^^^^
+
+============== ===========================================================
+Path           Installation directory
+============== ===========================================================
+*stdlib*       :file:`{userbase}/lib/python`
+*platstdlib*   :file:`{userbase}/lib/python`
+*platlib*      :file:`{userbase}/lib/python/site-packages`
+*purelib*      :file:`{userbase}/lib/python/site-packages`
+*include*      :file:`{userbase}/include/python{X.Y}`
+*scripts*      :file:`{userbase}/bin`
+*data*         :file:`{userbase}`
+============== ===========================================================
+
+
+.. _sysconfig-home-scheme:
+
+Home scheme
+-----------
+
+The idea behind the "home scheme" is that you build and maintain a personal
+stash of Python modules.  This scheme's name is derived from the idea of a
+"home" directory on Unix, since it's not unusual for a Unix user to make their
+home directory have a layout similar to :file:`/usr/` or :file:`/usr/local/`.
+This scheme can be used by anyone, regardless of the operating system they
+are installing for.
+
+``posix_home``
+^^^^^^^^^^^^^^
+
+============== ===========================================================
+Path           Installation directory
+============== ===========================================================
+*stdlib*       :file:`{home}/lib/python`
+*platstdlib*   :file:`{home}/lib/python`
+*platlib*      :file:`{home}/lib/python`
+*purelib*      :file:`{home}/lib/python`
+*include*      :file:`{home}/include/python`
+*platinclude*  :file:`{home}/include/python`
+*scripts*      :file:`{home}/bin`
+*data*         :file:`{home}`
+============== ===========================================================
+
+
+.. _sysconfig-prefix-scheme:
+
+Prefix scheme
+-------------
+
+The "prefix scheme" is useful when you wish to use one Python installation to
+perform the build/install (i.e., to run the setup script), but install modules
+into the third-party module directory of a different Python installation (or
+something that looks like a different Python installation).  If this sounds a
+trifle unusual, it is---that's why the user and home schemes come before.  However,
+there are at least two known cases where the prefix scheme will be useful.
+
+First, consider that many Linux distributions put Python in :file:`/usr`, rather
+than the more traditional :file:`/usr/local`.  This is entirely appropriate,
+since in those cases Python is part of "the system" rather than a local add-on.
+However, if you are installing Python modules from source, you probably want
+them to go in :file:`/usr/local/lib/python2.{X}` rather than
+:file:`/usr/lib/python2.{X}`.
+
+Another possibility is a network filesystem where the name used to write to a
+remote directory is different from the name used to read it: for example, the
+Python interpreter accessed as :file:`/usr/local/bin/python` might search for
+modules in :file:`/usr/local/lib/python2.{X}`, but those modules would have to
+be installed to, say, :file:`/mnt/{@server}/export/lib/python2.{X}`.
+
+``posix_prefix``
+^^^^^^^^^^^^^^^^
+
+============== ==========================================================
+Path           Installation directory
+============== ==========================================================
+*stdlib*       :file:`{prefix}/lib/python{X.Y}`
+*platstdlib*   :file:`{prefix}/lib/python{X.Y}`
+*platlib*      :file:`{prefix}/lib/python{X.Y}/site-packages`
+*purelib*      :file:`{prefix}/lib/python{X.Y}/site-packages`
+*include*      :file:`{prefix}/include/python{X.Y}`
+*platinclude*  :file:`{prefix}/include/python{X.Y}`
+*scripts*      :file:`{prefix}/bin`
+*data*         :file:`{prefix}`
+============== ==========================================================
+
+``nt``
+^^^^^^
+
+============== ==========================================================
+Path           Installation directory
+============== ==========================================================
+*stdlib*       :file:`{prefix}\\Lib`
+*platstdlib*   :file:`{prefix}\\Lib`
+*platlib*      :file:`{prefix}\\Lib\\site-packages`
+*purelib*      :file:`{prefix}\\Lib\\site-packages`
+*include*      :file:`{prefix}\\Include`
+*platinclude*  :file:`{prefix}\\Include`
+*scripts*      :file:`{prefix}\\Scripts`
+*data*         :file:`{prefix}`
+============== ==========================================================
+
+
+Installation path functions
+---------------------------
+
+:mod:`sysconfig` provides some functions to determine these installation paths.
 
 .. function:: get_scheme_names()
 
diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst
index 62d67bc577c7d0..3e5723a66780ca 100644
--- a/Doc/library/tarfile.rst
+++ b/Doc/library/tarfile.rst
@@ -1156,31 +1156,31 @@ For a list of the files in a tar archive, use the :option:`-l` option:
 Command-line options
 ~~~~~~~~~~~~~~~~~~~~
 
-.. cmdoption:: -l <tarfile>
-               --list <tarfile>
+.. option:: -l <tarfile>
+            --list <tarfile>
 
    List files in a tarfile.
 
-.. cmdoption:: -c <tarfile> <source1> ... <sourceN>
-               --create <tarfile> <source1> ... <sourceN>
+.. option:: -c <tarfile> <source1> ... <sourceN>
+            --create <tarfile> <source1> ... <sourceN>
 
    Create tarfile from source files.
 
-.. cmdoption:: -e <tarfile> [<output_dir>]
-               --extract <tarfile> [<output_dir>]
+.. option:: -e <tarfile> [<output_dir>]
+            --extract <tarfile> [<output_dir>]
 
    Extract tarfile into the current directory if *output_dir* is not specified.
 
-.. cmdoption:: -t <tarfile>
-               --test <tarfile>
+.. option:: -t <tarfile>
+            --test <tarfile>
 
    Test whether the tarfile is valid or not.
 
-.. cmdoption:: -v, --verbose
+.. option:: -v, --verbose
 
    Verbose output.
 
-.. cmdoption:: --filter <filtername>
+.. option:: --filter <filtername>
 
    Specifies the *filter* for ``--extract``.
    See :ref:`tarfile-extraction-filter` for details.
diff --git a/Doc/library/timeit.rst b/Doc/library/timeit.rst
index b3d2a1b9e0600f..616f8365b80f6c 100644
--- a/Doc/library/timeit.rst
+++ b/Doc/library/timeit.rst
@@ -151,7 +151,7 @@ The module defines three convenience functions and a public class:
       so that the total time >= 0.2 second, returning the eventual
       (number of loops, time taken for that number of loops). It calls
       :meth:`.timeit` with increasing numbers from the sequence 1, 2, 5,
-      10, 20, 50, ... until the time taken is at least 0.2 second.
+      10, 20, 50, ... until the time taken is at least 0.2 seconds.
 
       If *callback* is given and is not ``None``, it will be called after
       each trial with two arguments: ``callback(number, time_taken)``.
@@ -214,36 +214,36 @@ Where the following options are understood:
 
 .. program:: timeit
 
-.. cmdoption:: -n N, --number=N
+.. option:: -n N, --number=N
 
    how many times to execute 'statement'
 
-.. cmdoption:: -r N, --repeat=N
+.. option:: -r N, --repeat=N
 
    how many times to repeat the timer (default 5)
 
-.. cmdoption:: -s S, --setup=S
+.. option:: -s S, --setup=S
 
    statement to be executed once initially (default ``pass``)
 
-.. cmdoption:: -p, --process
+.. option:: -p, --process
 
    measure process time, not wallclock time, using :func:`time.process_time`
    instead of :func:`time.perf_counter`, which is the default
 
    .. versionadded:: 3.3
 
-.. cmdoption:: -u, --unit=U
+.. option:: -u, --unit=U
 
    specify a time unit for timer output; can select ``nsec``, ``usec``, ``msec``, or ``sec``
 
    .. versionadded:: 3.5
 
-.. cmdoption:: -v, --verbose
+.. option:: -v, --verbose
 
    print raw timing results; repeat for more digits precision
 
-.. cmdoption:: -h, --help
+.. option:: -h, --help
 
    print a short usage message and exit
 
diff --git a/Doc/library/tokenize.rst b/Doc/library/tokenize.rst
index bffe93006edc7b..92bdb052267a68 100644
--- a/Doc/library/tokenize.rst
+++ b/Doc/library/tokenize.rst
@@ -166,11 +166,11 @@ The following options are accepted:
 
 .. program:: tokenize
 
-.. cmdoption:: -h, --help
+.. option:: -h, --help
 
    show this help message and exit
 
-.. cmdoption:: -e, --exact
+.. option:: -e, --exact
 
    display token names using the exact type
 
diff --git a/Doc/library/trace.rst b/Doc/library/trace.rst
index 40cf198f1287d7..e9b59a6d186ba2 100644
--- a/Doc/library/trace.rst
+++ b/Doc/library/trace.rst
@@ -34,11 +34,11 @@ all Python modules imported during the execution into the current directory.
 
 .. program:: trace
 
-.. cmdoption:: --help
+.. option:: --help
 
    Display usage and exit.
 
-.. cmdoption:: --version
+.. option:: --version
 
    Display the version of the module and exit.
 
@@ -56,28 +56,28 @@ the :option:`--trace <-t>` and :option:`--count <-c>` options. When
 
 .. program:: trace
 
-.. cmdoption:: -c, --count
+.. option:: -c, --count
 
    Produce a set of annotated listing files upon program completion that shows
    how many times each statement was executed.  See also
    :option:`--coverdir <-C>`, :option:`--file <-f>` and
    :option:`--no-report <-R>` below.
 
-.. cmdoption:: -t, --trace
+.. option:: -t, --trace
 
    Display lines as they are executed.
 
-.. cmdoption:: -l, --listfuncs
+.. option:: -l, --listfuncs
 
    Display the functions executed by running the program.
 
-.. cmdoption:: -r, --report
+.. option:: -r, --report
 
    Produce an annotated list from an earlier program run that used the
    :option:`--count <-c>` and :option:`--file <-f>` option.  This does not
    execute any code.
 
-.. cmdoption:: -T, --trackcalls
+.. option:: -T, --trackcalls
 
    Display the calling relationships exposed by running the program.
 
@@ -86,33 +86,33 @@ Modifiers
 
 .. program:: trace
 
-.. cmdoption:: -f, --file=<file>
+.. option:: -f, --file=<file>
 
    Name of a file to accumulate counts over several tracing runs.  Should be
    used with the :option:`--count <-c>` option.
 
-.. cmdoption:: -C, --coverdir=<dir>
+.. option:: -C, --coverdir=<dir>
 
    Directory where the report files go.  The coverage report for
    ``package.module`` is written to file :file:`{dir}/{package}/{module}.cover`.
 
-.. cmdoption:: -m, --missing
+.. option:: -m, --missing
 
    When generating annotated listings, mark lines which were not executed with
    ``>>>>>>``.
 
-.. cmdoption:: -s, --summary
+.. option:: -s, --summary
 
    When using :option:`--count <-c>` or :option:`--report <-r>`, write a brief
    summary to stdout for each file processed.
 
-.. cmdoption:: -R, --no-report
+.. option:: -R, --no-report
 
    Do not generate annotated listings.  This is useful if you intend to make
    several runs with :option:`--count <-c>`, and then produce a single set of
    annotated listings at the end.
 
-.. cmdoption:: -g, --timing
+.. option:: -g, --timing
 
    Prefix each line with the time since the program started.  Only used while
    tracing.
@@ -124,12 +124,12 @@ These options may be repeated multiple times.
 
 .. program:: trace
 
-.. cmdoption:: --ignore-module=<mod>
+.. option:: --ignore-module=<mod>
 
    Ignore each of the given module names and its submodules (if it is a
    package).  The argument can be a list of names separated by a comma.
 
-.. cmdoption:: --ignore-dir=<dir>
+.. option:: --ignore-dir=<dir>
 
    Ignore all modules and packages in the named directory and subdirectories.
    The argument can be a list of directories separated by :data:`os.pathsep`.
diff --git a/Doc/library/tty.rst b/Doc/library/tty.rst
index fc7f98c7931fa5..a4777772e1fc6c 100644
--- a/Doc/library/tty.rst
+++ b/Doc/library/tty.rst
@@ -43,6 +43,9 @@ The :mod:`tty` module defines the following functions:
    :func:`termios.tcsetattr`. The return value of :func:`termios.tcgetattr`
    is saved before setting *fd* to raw mode; this value is returned.
 
+   .. versionchanged:: 3.12
+      The return value is now the original tty attributes, instead of None.
+
 
 .. function:: setcbreak(fd, when=termios.TCSAFLUSH)
 
@@ -51,6 +54,9 @@ The :mod:`tty` module defines the following functions:
    :func:`termios.tcsetattr`. The return value of :func:`termios.tcgetattr`
    is saved before setting *fd* to cbreak mode; this value is returned.
 
+   .. versionchanged:: 3.12
+      The return value is now the original tty attributes, instead of None.
+
 
 .. seealso::
 
diff --git a/Doc/library/unittest.rst b/Doc/library/unittest.rst
index 4c28e8fae8b088..21abc583f853a7 100644
--- a/Doc/library/unittest.rst
+++ b/Doc/library/unittest.rst
@@ -206,13 +206,13 @@ Command-line options
 
 .. program:: unittest
 
-.. cmdoption:: -b, --buffer
+.. option:: -b, --buffer
 
    The standard output and standard error streams are buffered during the test
    run. Output during a passing test is discarded. Output is echoed normally
    on test fail or error and is added to the failure messages.
 
-.. cmdoption:: -c, --catch
+.. option:: -c, --catch
 
    :kbd:`Control-C` during the test run waits for the current test to end and then
    reports all the results so far. A second :kbd:`Control-C` raises the normal
@@ -220,11 +220,11 @@ Command-line options
 
    See `Signal Handling`_ for the functions that provide this functionality.
 
-.. cmdoption:: -f, --failfast
+.. option:: -f, --failfast
 
    Stop the test run on the first error or failure.
 
-.. cmdoption:: -k
+.. option:: -k
 
    Only run test methods and classes that match the pattern or substring.
    This option may be used multiple times, in which case all test cases that
@@ -240,11 +240,11 @@ Command-line options
    For example, ``-k foo`` matches ``foo_tests.SomeTest.test_something``,
    ``bar_tests.SomeTest.test_foo``, but not ``bar_tests.FooTest.test_something``.
 
-.. cmdoption:: --locals
+.. option:: --locals
 
    Show local variables in tracebacks.
 
-.. cmdoption:: --durations N
+.. option:: --durations N
 
    Show the N slowest test cases (N=0 for all).
 
@@ -292,19 +292,19 @@ The ``discover`` sub-command has the following options:
 
 .. program:: unittest discover
 
-.. cmdoption:: -v, --verbose
+.. option:: -v, --verbose
 
    Verbose output
 
-.. cmdoption:: -s, --start-directory directory
+.. option:: -s, --start-directory directory
 
    Directory to start discovery (``.`` default)
 
-.. cmdoption:: -p, --pattern pattern
+.. option:: -p, --pattern pattern
 
    Pattern to match test files (``test*.py`` default)
 
-.. cmdoption:: -t, --top-level-directory directory
+.. option:: -t, --top-level-directory directory
 
    Top level directory of project (defaults to start directory)
 
diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst
index adf01770656754..e2d231da38fd9a 100644
--- a/Doc/library/uuid.rst
+++ b/Doc/library/uuid.rst
@@ -289,25 +289,25 @@ The following options are accepted:
 
 .. program:: uuid
 
-.. cmdoption:: -h, --help
+.. option:: -h, --help
 
    Show the help message and exit.
 
-.. cmdoption:: -u <uuid>
-               --uuid <uuid>
+.. option:: -u <uuid>
+            --uuid <uuid>
 
    Specify the function name to use to generate the uuid. By default :func:`uuid4`
    is used.
 
-.. cmdoption:: -n <namespace>
-               --namespace <namespace>
+.. option:: -n <namespace>
+            --namespace <namespace>
 
    The namespace is a ``UUID``, or ``@ns`` where ``ns`` is a well-known predefined UUID
    addressed by namespace name. Such as ``@dns``, ``@url``, ``@oid``, and ``@x500``.
    Only required for :func:`uuid3` / :func:`uuid5` functions.
 
-.. cmdoption:: -N <name>
-               --name <name>
+.. option:: -N <name>
+            --name <name>
 
    The name used as part of generating the uuid. Only required for
    :func:`uuid3` / :func:`uuid5` functions.
diff --git a/Doc/library/zipapp.rst b/Doc/library/zipapp.rst
index 7c01fc102fca07..104afca23a20b4 100644
--- a/Doc/library/zipapp.rst
+++ b/Doc/library/zipapp.rst
@@ -54,7 +54,7 @@ The following options are understood:
 
 .. program:: zipapp
 
-.. cmdoption:: -o <output>, --output=<output>
+.. option:: -o <output>, --output=<output>
 
    Write the output to a file named *output*.  If this option is not specified,
    the output filename will be the same as the input *source*, with the
@@ -64,13 +64,13 @@ The following options are understood:
    An output filename must be specified if the *source* is an archive (and in
    that case, *output* must not be the same as *source*).
 
-.. cmdoption:: -p <interpreter>, --python=<interpreter>
+.. option:: -p <interpreter>, --python=<interpreter>
 
    Add a ``#!`` line to the archive specifying *interpreter* as the command
    to run.  Also, on POSIX, make the archive executable.  The default is to
    write no ``#!`` line, and not make the file executable.
 
-.. cmdoption:: -m <mainfn>, --main=<mainfn>
+.. option:: -m <mainfn>, --main=<mainfn>
 
    Write a ``__main__.py`` file to the archive that executes *mainfn*.  The
    *mainfn* argument should have the form "pkg.mod:fn", where "pkg.mod" is a
@@ -79,7 +79,7 @@ The following options are understood:
 
    :option:`--main` cannot be specified when copying an archive.
 
-.. cmdoption:: -c, --compress
+.. option:: -c, --compress
 
    Compress files with the deflate method, reducing the size of the output
    file. By default, files are stored uncompressed in the archive.
@@ -88,13 +88,13 @@ The following options are understood:
 
    .. versionadded:: 3.7
 
-.. cmdoption:: --info
+.. option:: --info
 
    Display the interpreter embedded in the archive, for diagnostic purposes.  In
    this case, any other options are ignored and SOURCE must be an archive, not a
    directory.
 
-.. cmdoption:: -h, --help
+.. option:: -h, --help
 
    Print a short usage message and exit.
 
diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst
index bd951e4872f113..a77e49a7643826 100644
--- a/Doc/library/zipfile.rst
+++ b/Doc/library/zipfile.rst
@@ -906,27 +906,27 @@ For a list of the files in a ZIP archive, use the :option:`-l` option:
 Command-line options
 ~~~~~~~~~~~~~~~~~~~~
 
-.. cmdoption:: -l <zipfile>
-               --list <zipfile>
+.. option:: -l <zipfile>
+            --list <zipfile>
 
    List files in a zipfile.
 
-.. cmdoption:: -c <zipfile> <source1> ... <sourceN>
-               --create <zipfile> <source1> ... <sourceN>
+.. option:: -c <zipfile> <source1> ... <sourceN>
+            --create <zipfile> <source1> ... <sourceN>
 
    Create zipfile from source files.
 
-.. cmdoption:: -e <zipfile> <output_dir>
-               --extract <zipfile> <output_dir>
+.. option:: -e <zipfile> <output_dir>
+            --extract <zipfile> <output_dir>
 
    Extract zipfile into target directory.
 
-.. cmdoption:: -t <zipfile>
-               --test <zipfile>
+.. option:: -t <zipfile>
+            --test <zipfile>
 
    Test whether the zipfile is valid or not.
 
-.. cmdoption:: --metadata-encoding <encoding>
+.. option:: --metadata-encoding <encoding>
 
    Specify encoding of member names for :option:`-l`, :option:`-e` and
    :option:`-t`.
diff --git a/Doc/requirements-oldest-sphinx.txt b/Doc/requirements-oldest-sphinx.txt
index d3ef5bc17650ae..5de739fc10b085 100644
--- a/Doc/requirements-oldest-sphinx.txt
+++ b/Doc/requirements-oldest-sphinx.txt
@@ -13,16 +13,16 @@ python-docs-theme>=2022.1
 # Sphinx 4.2 comes from ``needs_sphinx = '4.2'`` in ``Doc/conf.py``.
 
 alabaster==0.7.13
-Babel==2.12.1
+Babel==2.13.0
 certifi==2023.7.22
-charset-normalizer==3.2.0
+charset-normalizer==3.3.0
 colorama==0.4.6
-docutils==0.16
+docutils==0.17.1
 idna==3.4
 imagesize==1.4.1
-Jinja2==2.11.3
-MarkupSafe==1.1.1
-packaging==23.1
+Jinja2==3.1.2
+MarkupSafe==2.1.3
+packaging==23.2
 Pygments==2.16.1
 requests==2.31.0
 snowballstemmer==2.2.0
@@ -33,4 +33,4 @@ sphinxcontrib-htmlhelp==2.0.1
 sphinxcontrib-jsmath==1.0.1
 sphinxcontrib-qthelp==1.0.3
 sphinxcontrib-serializinghtml==1.1.5
-urllib3==2.0.4
+urllib3==2.0.6
diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore
index f217da9052ca78..fbc9fc33ecea1b 100644
--- a/Doc/tools/.nitignore
+++ b/Doc/tools/.nitignore
@@ -106,7 +106,6 @@ Doc/library/select.rst
 Doc/library/selectors.rst
 Doc/library/shelve.rst
 Doc/library/signal.rst
-Doc/library/site.rst
 Doc/library/smtplib.rst
 Doc/library/socket.rst
 Doc/library/socketserver.rst
@@ -114,7 +113,6 @@ Doc/library/ssl.rst
 Doc/library/stdtypes.rst
 Doc/library/string.rst
 Doc/library/subprocess.rst
-Doc/library/sys_path_init.rst
 Doc/library/syslog.rst
 Doc/library/tarfile.rst
 Doc/library/tempfile.rst
diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst
index 921b6a6961c7b2..f68a2251f06d4a 100644
--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@@ -59,7 +59,7 @@ all consecutive arguments will end up in :data:`sys.argv` -- note that the first
 element, subscript zero (``sys.argv[0]``), is a string reflecting the program's
 source.
 
-.. cmdoption:: -c <command>
+.. option:: -c <command>
 
    Execute the Python code in *command*.  *command* can be one or more
    statements separated by newlines, with significant leading whitespace as in
@@ -72,7 +72,7 @@ source.
 
    .. audit-event:: cpython.run_command command cmdoption-c
 
-.. cmdoption:: -m <module-name>
+.. option:: -m <module-name>
 
    Search :data:`sys.path` for the named module and execute its contents as
    the :mod:`__main__` module.
@@ -188,35 +188,35 @@ automatically enabled, if available on your platform (see
 Generic options
 ~~~~~~~~~~~~~~~
 
-.. cmdoption:: -?
-               -h
-               --help
+.. option:: -?
+            -h
+            --help
 
    Print a short description of all command line options and corresponding
    environment variables and exit.
 
-.. cmdoption:: --help-env
+.. option:: --help-env
 
    Print a short description of Python-specific environment variables
    and exit.
 
    .. versionadded:: 3.11
 
-.. cmdoption:: --help-xoptions
+.. option:: --help-xoptions
 
    Print a description of implementation-specific :option:`-X` options
    and exit.
 
    .. versionadded:: 3.11
 
-.. cmdoption:: --help-all
+.. option:: --help-all
 
    Print complete usage information and exit.
 
    .. versionadded:: 3.11
 
-.. cmdoption:: -V
-               --version
+.. option:: -V
+            --version
 
    Print the Python version number and exit.  Example output could be:
 
@@ -240,7 +240,7 @@ Generic options
 Miscellaneous options
 ~~~~~~~~~~~~~~~~~~~~~
 
-.. cmdoption:: -b
+.. option:: -b
 
    Issue a warning when comparing :class:`bytes` or :class:`bytearray` with
    :class:`str` or :class:`bytes` with :class:`int`.  Issue an error when the
@@ -249,13 +249,13 @@ Miscellaneous options
    .. versionchanged:: 3.5
       Affects comparisons of :class:`bytes` with :class:`int`.
 
-.. cmdoption:: -B
+.. option:: -B
 
    If given, Python won't try to write ``.pyc`` files on the
    import of source modules.  See also :envvar:`PYTHONDONTWRITEBYTECODE`.
 
 
-.. cmdoption:: --check-hash-based-pycs default|always|never
+.. option:: --check-hash-based-pycs default|always|never
 
    Control the validation behavior of hash-based ``.pyc`` files. See
    :ref:`pyc-invalidation`. When set to ``default``, checked and unchecked
@@ -269,7 +269,7 @@ Miscellaneous options
    option.
 
 
-.. cmdoption:: -d
+.. option:: -d
 
    Turn on parser debugging output (for expert only).
    See also the :envvar:`PYTHONDEBUG` environment variable.
@@ -278,7 +278,7 @@ Miscellaneous options
    it's ignored.
 
 
-.. cmdoption:: -E
+.. option:: -E
 
    Ignore all :envvar:`PYTHON*` environment variables, e.g.
    :envvar:`PYTHONPATH` and :envvar:`PYTHONHOME`, that might be set.
@@ -286,7 +286,7 @@ Miscellaneous options
    See also the :option:`-P` and :option:`-I` (isolated) options.
 
 
-.. cmdoption:: -i
+.. option:: -i
 
    When a script is passed as first argument or the :option:`-c` option is used,
    enter interactive mode after executing the script or the command, even when
@@ -297,7 +297,7 @@ Miscellaneous options
    raises an exception.  See also :envvar:`PYTHONINSPECT`.
 
 
-.. cmdoption:: -I
+.. option:: -I
 
    Run Python in isolated mode. This also implies :option:`-E`, :option:`-P`
    and :option:`-s` options.
@@ -310,7 +310,7 @@ Miscellaneous options
    .. versionadded:: 3.4
 
 
-.. cmdoption:: -O
+.. option:: -O
 
    Remove assert statements and any code conditional on the value of
    :const:`__debug__`.  Augment the filename for compiled
@@ -321,7 +321,7 @@ Miscellaneous options
       Modify ``.pyc`` filenames according to :pep:`488`.
 
 
-.. cmdoption:: -OO
+.. option:: -OO
 
    Do :option:`-O` and also discard docstrings.  Augment the filename
    for compiled (:term:`bytecode`) files by adding ``.opt-2`` before the
@@ -331,7 +331,7 @@ Miscellaneous options
       Modify ``.pyc`` filenames according to :pep:`488`.
 
 
-.. cmdoption:: -P
+.. option:: -P
 
    Don't prepend a potentially unsafe path to :data:`sys.path`:
 
@@ -348,14 +348,14 @@ Miscellaneous options
    .. versionadded:: 3.11
 
 
-.. cmdoption:: -q
+.. option:: -q
 
    Don't display the copyright and version messages even in interactive mode.
 
    .. versionadded:: 3.2
 
 
-.. cmdoption:: -R
+.. option:: -R
 
    Turn on hash randomization. This option only has an effect if the
    :envvar:`PYTHONHASHSEED` environment variable is set to ``0``, since hash
@@ -381,7 +381,7 @@ Miscellaneous options
    .. versionadded:: 3.2.3
 
 
-.. cmdoption:: -s
+.. option:: -s
 
    Don't add the :data:`user site-packages directory <site.USER_SITE>` to
    :data:`sys.path`.
@@ -391,7 +391,7 @@ Miscellaneous options
       :pep:`370` -- Per user site-packages directory
 
 
-.. cmdoption:: -S
+.. option:: -S
 
    Disable the import of the module :mod:`site` and the site-dependent
    manipulations of :data:`sys.path` that it entails.  Also disable these
@@ -399,7 +399,7 @@ Miscellaneous options
    :func:`site.main` if you want them to be triggered).
 
 
-.. cmdoption:: -u
+.. option:: -u
 
    Force the stdout and stderr streams to be unbuffered.  This option has no
    effect on the stdin stream.
@@ -410,7 +410,7 @@ Miscellaneous options
       The text layer of the stdout and stderr streams now is unbuffered.
 
 
-.. cmdoption:: -v
+.. option:: -v
 
    Print a message each time a module is initialized, showing the place
    (filename or built-in module) from which it is loaded.  When given twice
@@ -425,7 +425,7 @@ Miscellaneous options
 
 
 .. _using-on-warnings:
-.. cmdoption:: -W arg
+.. option:: -W arg
 
    Warning control. Python's warning machinery by default prints warning
    messages to :data:`sys.stderr`.
@@ -484,13 +484,13 @@ Miscellaneous options
    details.
 
 
-.. cmdoption:: -x
+.. option:: -x
 
    Skip the first line of the source, allowing use of non-Unix forms of
    ``#!cmd``.  This is intended for a DOS specific hack only.
 
 
-.. cmdoption:: -X
+.. option:: -X
 
    Reserved for various implementation-specific options.  CPython currently
    defines the following possible values:
@@ -597,7 +597,7 @@ Miscellaneous options
 Options you shouldn't use
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. cmdoption:: -J
+.. option:: -J
 
    Reserved for use by Jython_.
 
@@ -811,7 +811,7 @@ conflict.
 
    Defines the :data:`user base directory <site.USER_BASE>`, which is used to
    compute the path of the :data:`user site-packages directory <site.USER_SITE>`
-   and installation paths for
+   and :ref:`installation paths <sysconfig-user-scheme>` for
    ``python -m pip install --user``.
 
    .. seealso::
diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst
index 83b4c7aa0481e9..5f9e695d10ad44 100644
--- a/Doc/using/configure.rst
+++ b/Doc/using/configure.rst
@@ -97,7 +97,7 @@ See also the :file:`Misc/SpecialBuilds.txt` in the Python source distribution.
 General Options
 ---------------
 
-.. cmdoption:: --enable-loadable-sqlite-extensions
+.. option:: --enable-loadable-sqlite-extensions
 
    Support loadable extensions in the :mod:`!_sqlite` extension module (default
    is no) of the :mod:`sqlite3` module.
@@ -107,12 +107,12 @@ General Options
 
    .. versionadded:: 3.6
 
-.. cmdoption:: --disable-ipv6
+.. option:: --disable-ipv6
 
    Disable IPv6 support (enabled by default if supported), see the
    :mod:`socket` module.
 
-.. cmdoption:: --enable-big-digits=[15|30]
+.. option:: --enable-big-digits=[15|30]
 
    Define the size in bits of Python :class:`int` digits: 15 or 30 bits.
 
@@ -122,7 +122,7 @@ General Options
 
    See :data:`sys.int_info.bits_per_digit <sys.int_info>`.
 
-.. cmdoption:: --with-suffix=SUFFIX
+.. option:: --with-suffix=SUFFIX
 
    Set the Python executable suffix to *SUFFIX*.
 
@@ -135,7 +135,7 @@ General Options
       The default suffix on WASM platform is one of ``.js``, ``.html``
       or ``.wasm``.
 
-.. cmdoption:: --with-tzpath=<list of absolute paths separated by pathsep>
+.. option:: --with-tzpath=<list of absolute paths separated by pathsep>
 
    Select the default time zone search path for :const:`zoneinfo.TZPATH`.
    See the :ref:`Compile-time configuration
@@ -147,7 +147,7 @@ General Options
 
    .. versionadded:: 3.9
 
-.. cmdoption:: --without-decimal-contextvar
+.. option:: --without-decimal-contextvar
 
    Build the ``_decimal`` extension module using a thread-local context rather
    than a coroutine-local context (default), see the :mod:`decimal` module.
@@ -156,7 +156,7 @@ General Options
 
    .. versionadded:: 3.9
 
-.. cmdoption:: --with-dbmliborder=<list of backend names>
+.. option:: --with-dbmliborder=<list of backend names>
 
    Override order to check db backends for the :mod:`dbm` module
 
@@ -166,7 +166,7 @@ General Options
    * ``gdbm``;
    * ``bdb``.
 
-.. cmdoption:: --without-c-locale-coercion
+.. option:: --without-c-locale-coercion
 
    Disable C locale coercion to a UTF-8 based locale (enabled by default).
 
@@ -174,13 +174,13 @@ General Options
 
    See :envvar:`PYTHONCOERCECLOCALE` and the :pep:`538`.
 
-.. cmdoption:: --without-freelists
+.. option:: --without-freelists
 
    Disable all freelists except the empty tuple singleton.
 
    .. versionadded:: 3.11
 
-.. cmdoption:: --with-platlibdir=DIRNAME
+.. option:: --with-platlibdir=DIRNAME
 
    Python library directory name (default is ``lib``).
 
@@ -190,7 +190,7 @@ General Options
 
    .. versionadded:: 3.9
 
-.. cmdoption:: --with-wheel-pkg-dir=PATH
+.. option:: --with-wheel-pkg-dir=PATH
 
    Directory of wheel packages used by the :mod:`ensurepip` module
    (none by default).
@@ -202,7 +202,7 @@ General Options
 
    .. versionadded:: 3.10
 
-.. cmdoption:: --with-pkg-config=[check|yes|no]
+.. option:: --with-pkg-config=[check|yes|no]
 
    Whether configure should use :program:`pkg-config` to detect build
    dependencies.
@@ -213,7 +213,7 @@ General Options
 
    .. versionadded:: 3.11
 
-.. cmdoption:: --enable-pystats
+.. option:: --enable-pystats
 
    Turn on internal Python performance statistics gathering.
 
@@ -280,7 +280,7 @@ General Options
 
    .. versionadded:: 3.11
 
-.. cmdoption:: --disable-gil
+.. option:: --disable-gil
 
    Enables **experimental** support for running Python without the
    :term:`global interpreter lock` (GIL).
@@ -289,12 +289,12 @@ General Options
 
    .. versionadded:: 3.13
 
-.. cmdoption:: PKG_CONFIG
+.. option:: PKG_CONFIG
 
    Path to ``pkg-config`` utility.
 
-.. cmdoption:: PKG_CONFIG_LIBDIR
-.. cmdoption:: PKG_CONFIG_PATH
+.. option:: PKG_CONFIG_LIBDIR
+.. option:: PKG_CONFIG_PATH
 
    ``pkg-config`` options.
 
@@ -302,19 +302,19 @@ General Options
 C compiler options
 ------------------
 
-.. cmdoption:: CC
+.. option:: CC
 
    C compiler command.
 
-.. cmdoption:: CFLAGS
+.. option:: CFLAGS
 
    C compiler flags.
 
-.. cmdoption:: CPP
+.. option:: CPP
 
    C preprocessor command.
 
-.. cmdoption:: CPPFLAGS
+.. option:: CPPFLAGS
 
    C preprocessor flags, e.g. :samp:`-I{include_dir}`.
 
@@ -322,15 +322,15 @@ C compiler options
 Linker options
 --------------
 
-.. cmdoption:: LDFLAGS
+.. option:: LDFLAGS
 
    Linker flags, e.g. :samp:`-L{library_directory}`.
 
-.. cmdoption:: LIBS
+.. option:: LIBS
 
    Libraries to pass to the linker, e.g. :samp:`-l{library}`.
 
-.. cmdoption:: MACHDEP
+.. option:: MACHDEP
 
    Name for machine-dependent library files.
 
@@ -340,80 +340,80 @@ Options for third-party dependencies
 
 .. versionadded:: 3.11
 
-.. cmdoption:: BZIP2_CFLAGS
-.. cmdoption:: BZIP2_LIBS
+.. option:: BZIP2_CFLAGS
+.. option:: BZIP2_LIBS
 
    C compiler and linker flags to link Python to ``libbz2``, used by :mod:`bz2`
    module, overriding ``pkg-config``.
 
-.. cmdoption:: CURSES_CFLAGS
-.. cmdoption:: CURSES_LIBS
+.. option:: CURSES_CFLAGS
+.. option:: CURSES_LIBS
 
    C compiler and linker flags for ``libncurses`` or ``libncursesw``, used by
    :mod:`curses` module, overriding ``pkg-config``.
 
-.. cmdoption:: GDBM_CFLAGS
-.. cmdoption:: GDBM_LIBS
+.. option:: GDBM_CFLAGS
+.. option:: GDBM_LIBS
 
    C compiler and linker flags for ``gdbm``.
 
-.. cmdoption:: LIBB2_CFLAGS
-.. cmdoption:: LIBB2_LIBS
+.. option:: LIBB2_CFLAGS
+.. option:: LIBB2_LIBS
 
    C compiler and linker flags for ``libb2`` (:ref:`BLAKE2 <hashlib-blake2>`),
    used by :mod:`hashlib` module, overriding ``pkg-config``.
 
-.. cmdoption:: LIBEDIT_CFLAGS
-.. cmdoption:: LIBEDIT_LIBS
+.. option:: LIBEDIT_CFLAGS
+.. option:: LIBEDIT_LIBS
 
    C compiler and linker flags for ``libedit``, used by :mod:`readline` module,
    overriding ``pkg-config``.
 
-.. cmdoption:: LIBFFI_CFLAGS
-.. cmdoption:: LIBFFI_LIBS
+.. option:: LIBFFI_CFLAGS
+.. option:: LIBFFI_LIBS
 
    C compiler and linker flags for ``libffi``, used by :mod:`ctypes` module,
    overriding ``pkg-config``.
 
-.. cmdoption:: LIBLZMA_CFLAGS
-.. cmdoption:: LIBLZMA_LIBS
+.. option:: LIBLZMA_CFLAGS
+.. option:: LIBLZMA_LIBS
 
    C compiler and linker flags for ``liblzma``, used by :mod:`lzma` module,
    overriding ``pkg-config``.
 
-.. cmdoption:: LIBREADLINE_CFLAGS
-.. cmdoption:: LIBREADLINE_LIBS
+.. option:: LIBREADLINE_CFLAGS
+.. option:: LIBREADLINE_LIBS
 
    C compiler and linker flags for ``libreadline``, used by :mod:`readline`
    module, overriding ``pkg-config``.
 
-.. cmdoption:: LIBSQLITE3_CFLAGS
-.. cmdoption:: LIBSQLITE3_LIBS
+.. option:: LIBSQLITE3_CFLAGS
+.. option:: LIBSQLITE3_LIBS
 
    C compiler and linker flags for ``libsqlite3``, used by :mod:`sqlite3`
    module, overriding ``pkg-config``.
 
-.. cmdoption:: LIBUUID_CFLAGS
-.. cmdoption:: LIBUUID_LIBS
+.. option:: LIBUUID_CFLAGS
+.. option:: LIBUUID_LIBS
 
    C compiler and linker flags for ``libuuid``, used by :mod:`uuid` module,
    overriding ``pkg-config``.
 
-.. cmdoption:: PANEL_CFLAGS
-.. cmdoption:: PANEL_LIBS
+.. option:: PANEL_CFLAGS
+.. option:: PANEL_LIBS
 
    C compiler and Linker flags for PANEL, overriding ``pkg-config``.
 
    C compiler and linker flags for ``libpanel`` or ``libpanelw``, used by
    :mod:`curses.panel` module, overriding ``pkg-config``.
 
-.. cmdoption:: TCLTK_CFLAGS
-.. cmdoption:: TCLTK_LIBS
+.. option:: TCLTK_CFLAGS
+.. option:: TCLTK_LIBS
 
    C compiler and linker flags for TCLTK, overriding ``pkg-config``.
 
-.. cmdoption:: ZLIB_CFLAGS
-.. cmdoption:: ZLIB_LIBS
+.. option:: ZLIB_CFLAGS
+.. option:: ZLIB_LIBS
 
    C compiler and linker flags for ``libzlib``, used by :mod:`gzip` module,
    overriding ``pkg-config``.
@@ -422,7 +422,7 @@ Options for third-party dependencies
 WebAssembly Options
 -------------------
 
-.. cmdoption:: --with-emscripten-target=[browser|node]
+.. option:: --with-emscripten-target=[browser|node]
 
    Set build flavor for ``wasm32-emscripten``.
 
@@ -431,7 +431,7 @@ WebAssembly Options
 
    .. versionadded:: 3.11
 
-.. cmdoption:: --enable-wasm-dynamic-linking
+.. option:: --enable-wasm-dynamic-linking
 
    Turn on dynamic linking support for WASM.
 
@@ -440,7 +440,7 @@ WebAssembly Options
 
    .. versionadded:: 3.11
 
-.. cmdoption:: --enable-wasm-pthreads
+.. option:: --enable-wasm-pthreads
 
    Turn on pthreads support for WASM.
 
@@ -450,7 +450,7 @@ WebAssembly Options
 Install Options
 ---------------
 
-.. cmdoption:: --prefix=PREFIX
+.. option:: --prefix=PREFIX
 
    Install architecture-independent files in PREFIX. On Unix, it
    defaults to :file:`/usr/local`.
@@ -460,20 +460,20 @@ Install Options
    As an example, one can use ``--prefix="$HOME/.local/"`` to install
    a Python in its home directory.
 
-.. cmdoption:: --exec-prefix=EPREFIX
+.. option:: --exec-prefix=EPREFIX
 
    Install architecture-dependent files in EPREFIX, defaults to :option:`--prefix`.
 
    This value can be retrieved at runtime using :data:`sys.exec_prefix`.
 
-.. cmdoption:: --disable-test-modules
+.. option:: --disable-test-modules
 
    Don't build nor install test modules, like the :mod:`test` package or the
    :mod:`!_testcapi` extension module (built and installed by default).
 
    .. versionadded:: 3.10
 
-.. cmdoption:: --with-ensurepip=[upgrade|install|no]
+.. option:: --with-ensurepip=[upgrade|install|no]
 
    Select the :mod:`ensurepip` command run on Python installation:
 
@@ -492,7 +492,7 @@ Configuring Python using ``--enable-optimizations --with-lto`` (PGO + LTO) is
 recommended for best performance. The experimental ``--enable-bolt`` flag can
 also be used to improve performance.
 
-.. cmdoption:: --enable-optimizations
+.. option:: --enable-optimizations
 
    Enable Profile Guided Optimization (PGO) using :envvar:`PROFILE_TASK`
    (disabled by default).
@@ -518,7 +518,10 @@ also be used to improve performance.
 
    .. versionadded:: 3.8
 
-.. cmdoption:: --with-lto=[full|thin|no|yes]
+   .. versionchanged:: 3.13
+      Task failure is no longer ignored silently.
+
+.. option:: --with-lto=[full|thin|no|yes]
 
    Enable Link Time Optimization (LTO) in any build (disabled by default).
 
@@ -533,7 +536,7 @@ also be used to improve performance.
    .. versionchanged:: 3.12
       Use ThinLTO as the default optimization policy on Clang if the compiler accepts the flag.
 
-.. cmdoption:: --enable-bolt
+.. option:: --enable-bolt
 
    Enable usage of the `BOLT post-link binary optimizer
    <https://github.com/llvm/llvm-project/tree/main/bolt>`_ (disabled by
@@ -558,32 +561,32 @@ also be used to improve performance.
 
    .. versionadded:: 3.12
 
-.. cmdoption:: BOLT_APPLY_FLAGS
+.. option:: BOLT_APPLY_FLAGS
 
    Arguments to ``llvm-bolt`` when creating a `BOLT optimized binary
    <https://github.com/facebookarchive/BOLT>`_.
 
    .. versionadded:: 3.12
 
-.. cmdoption:: BOLT_INSTRUMENT_FLAGS
+.. option:: BOLT_INSTRUMENT_FLAGS
 
    Arguments to ``llvm-bolt`` when instrumenting binaries.
 
    .. versionadded:: 3.12
 
-.. cmdoption:: --with-computed-gotos
+.. option:: --with-computed-gotos
 
    Enable computed gotos in evaluation loop (enabled by default on supported
    compilers).
 
-.. cmdoption:: --without-pymalloc
+.. option:: --without-pymalloc
 
    Disable the specialized Python memory allocator :ref:`pymalloc <pymalloc>`
    (enabled by default).
 
    See also :envvar:`PYTHONMALLOC` environment variable.
 
-.. cmdoption:: --without-doc-strings
+.. option:: --without-doc-strings
 
    Disable static documentation strings to reduce the memory footprint (enabled
    by default). Documentation strings defined in Python are not affected.
@@ -592,11 +595,11 @@ also be used to improve performance.
 
    See the ``PyDoc_STRVAR()`` macro.
 
-.. cmdoption:: --enable-profiling
+.. option:: --enable-profiling
 
    Enable C-level code profiling with ``gprof`` (disabled by default).
 
-.. cmdoption:: --with-strict-overflow
+.. option:: --with-strict-overflow
 
    Add ``-fstrict-overflow`` to the C compiler flags (by default we add
    ``-fno-strict-overflow`` instead).
@@ -652,12 +655,12 @@ See also the :ref:`Python Development Mode <devmode>` and the
 Debug options
 -------------
 
-.. cmdoption:: --with-pydebug
+.. option:: --with-pydebug
 
    :ref:`Build Python in debug mode <debug-build>`: define the ``Py_DEBUG``
    macro (disabled by default).
 
-.. cmdoption:: --with-trace-refs
+.. option:: --with-trace-refs
 
    Enable tracing references for debugging purpose (disabled by default).
 
@@ -678,7 +681,7 @@ Debug options
 
    .. versionadded:: 3.8
 
-.. cmdoption:: --with-assertions
+.. option:: --with-assertions
 
    Build with C assertions enabled (default is no): ``assert(...);`` and
    ``_PyObject_ASSERT(...);``.
@@ -691,11 +694,11 @@ Debug options
 
    .. versionadded:: 3.6
 
-.. cmdoption:: --with-valgrind
+.. option:: --with-valgrind
 
    Enable Valgrind support (default is no).
 
-.. cmdoption:: --with-dtrace
+.. option:: --with-dtrace
 
    Enable DTrace support (default is no).
 
@@ -704,19 +707,19 @@ Debug options
 
    .. versionadded:: 3.6
 
-.. cmdoption:: --with-address-sanitizer
+.. option:: --with-address-sanitizer
 
    Enable AddressSanitizer memory error detector, ``asan`` (default is no).
 
    .. versionadded:: 3.6
 
-.. cmdoption:: --with-memory-sanitizer
+.. option:: --with-memory-sanitizer
 
    Enable MemorySanitizer allocation error detector, ``msan`` (default is no).
 
    .. versionadded:: 3.6
 
-.. cmdoption:: --with-undefined-behavior-sanitizer
+.. option:: --with-undefined-behavior-sanitizer
 
    Enable UndefinedBehaviorSanitizer undefined behaviour detector, ``ubsan``
    (default is no).
@@ -727,11 +730,11 @@ Debug options
 Linker options
 --------------
 
-.. cmdoption:: --enable-shared
+.. option:: --enable-shared
 
    Enable building a shared Python library: ``libpython`` (default is no).
 
-.. cmdoption:: --without-static-libpython
+.. option:: --without-static-libpython
 
    Do not build ``libpythonMAJOR.MINOR.a`` and do not install ``python.o``
    (built and enabled by default).
@@ -742,23 +745,23 @@ Linker options
 Libraries options
 -----------------
 
-.. cmdoption:: --with-libs='lib1 ...'
+.. option:: --with-libs='lib1 ...'
 
    Link against additional libraries (default is no).
 
-.. cmdoption:: --with-system-expat
+.. option:: --with-system-expat
 
    Build the :mod:`!pyexpat` module using an installed ``expat`` library
    (default is no).
 
-.. cmdoption:: --with-system-libmpdec
+.. option:: --with-system-libmpdec
 
    Build the ``_decimal`` extension module using an installed ``mpdec``
    library, see the :mod:`decimal` module (default is no).
 
    .. versionadded:: 3.3
 
-.. cmdoption:: --with-readline=readline|editline
+.. option:: --with-readline=readline|editline
 
    Designate a backend library for the :mod:`readline` module.
 
@@ -767,7 +770,7 @@ Libraries options
 
    .. versionadded:: 3.10
 
-.. cmdoption:: --without-readline
+.. option:: --without-readline
 
    Don't build the :mod:`readline` module (built by default).
 
@@ -775,21 +778,21 @@ Libraries options
 
    .. versionadded:: 3.10
 
-.. cmdoption:: --with-libm=STRING
+.. option:: --with-libm=STRING
 
    Override ``libm`` math library to *STRING* (default is system-dependent).
 
-.. cmdoption:: --with-libc=STRING
+.. option:: --with-libc=STRING
 
    Override ``libc`` C library to *STRING* (default is system-dependent).
 
-.. cmdoption:: --with-openssl=DIR
+.. option:: --with-openssl=DIR
 
    Root of the OpenSSL directory.
 
    .. versionadded:: 3.7
 
-.. cmdoption:: --with-openssl-rpath=[no|auto|DIR]
+.. option:: --with-openssl-rpath=[no|auto|DIR]
 
    Set runtime library directory (rpath) for OpenSSL libraries:
 
@@ -804,7 +807,7 @@ Libraries options
 Security Options
 ----------------
 
-.. cmdoption:: --with-hash-algorithm=[fnv|siphash13|siphash24]
+.. option:: --with-hash-algorithm=[fnv|siphash13|siphash24]
 
    Select hash algorithm for use in ``Python/pyhash.c``:
 
@@ -817,7 +820,7 @@ Security Options
    .. versionadded:: 3.11
       ``siphash13`` is added and it is the new default.
 
-.. cmdoption:: --with-builtin-hashlib-hashes=md5,sha1,sha256,sha512,sha3,blake2
+.. option:: --with-builtin-hashlib-hashes=md5,sha1,sha256,sha512,sha3,blake2
 
    Built-in hash modules:
 
@@ -830,7 +833,7 @@ Security Options
 
    .. versionadded:: 3.9
 
-.. cmdoption:: --with-ssl-default-suites=[python|openssl|STRING]
+.. option:: --with-ssl-default-suites=[python|openssl|STRING]
 
    Override the OpenSSL default cipher suites string:
 
@@ -852,19 +855,19 @@ macOS Options
 
 See ``Mac/README.rst``.
 
-.. cmdoption:: --enable-universalsdk
-.. cmdoption:: --enable-universalsdk=SDKDIR
+.. option:: --enable-universalsdk
+.. option:: --enable-universalsdk=SDKDIR
 
    Create a universal binary build. *SDKDIR* specifies which macOS SDK should
    be used to perform the build (default is no).
 
-.. cmdoption:: --enable-framework
-.. cmdoption:: --enable-framework=INSTALLDIR
+.. option:: --enable-framework
+.. option:: --enable-framework=INSTALLDIR
 
    Create a Python.framework rather than a traditional Unix install. Optional
    *INSTALLDIR* specifies the installation path (default is no).
 
-.. cmdoption:: --with-universal-archs=ARCH
+.. option:: --with-universal-archs=ARCH
 
    Specify the kind of universal binary that should be created. This option is
    only valid when :option:`--enable-universalsdk` is set.
@@ -880,7 +883,7 @@ See ``Mac/README.rst``.
    * ``intel-64``;
    * ``all``.
 
-.. cmdoption:: --with-framework-name=FRAMEWORK
+.. option:: --with-framework-name=FRAMEWORK
 
    Specify the name for the python framework on macOS only valid when
    :option:`--enable-framework` is set (default: ``Python``).
@@ -894,21 +897,21 @@ for another CPU architecture or platform. Cross compiling requires a Python
 interpreter for the build platform. The version of the build Python must match
 the version of the cross compiled host Python.
 
-.. cmdoption:: --build=BUILD
+.. option:: --build=BUILD
 
    configure for building on BUILD, usually guessed by :program:`config.guess`.
 
-.. cmdoption:: --host=HOST
+.. option:: --host=HOST
 
    cross-compile to build programs to run on HOST (target platform)
 
-.. cmdoption:: --with-build-python=path/to/python
+.. option:: --with-build-python=path/to/python
 
    path to build ``python`` binary for cross compiling
 
    .. versionadded:: 3.11
 
-.. cmdoption:: CONFIG_SITE=file
+.. option:: CONFIG_SITE=file
 
    An environment variable that points to a file with configure overrides.
 
@@ -919,7 +922,7 @@ the version of the cross compiled host Python.
       ac_cv_file__dev_ptmx=yes
       ac_cv_file__dev_ptc=no
 
-.. cmdoption:: HOSTRUNNER
+.. option:: HOSTRUNNER
 
    Program to run CPython for the host platform for cross-compilation.
 
diff --git a/Doc/using/windows.rst b/Doc/using/windows.rst
index 2476e60a26d485..51afba9265d055 100644
--- a/Doc/using/windows.rst
+++ b/Doc/using/windows.rst
@@ -867,17 +867,18 @@ For example, if the first line of your script starts with
 
   #! /usr/bin/python
 
-The default Python will be located and used.  As many Python scripts written
-to work on Unix will already have this line, you should find these scripts can
-be used by the launcher without modification.  If you are writing a new script
-on Windows which you hope will be useful on Unix, you should use one of the
-shebang lines starting with ``/usr``.
+The default Python or an active virtual environment will be located and used.
+As many Python scripts written to work on Unix will already have this line,
+you should find these scripts can be used by the launcher without modification.
+If you are writing a new script on Windows which you hope will be useful on
+Unix, you should use one of the shebang lines starting with ``/usr``.
 
 Any of the above virtual commands can be suffixed with an explicit version
 (either just the major version, or the major and minor version).
 Furthermore the 32-bit version can be requested by adding "-32" after the
 minor version. I.e. ``/usr/bin/python3.7-32`` will request usage of the
-32-bit python 3.7.
+32-bit Python 3.7. If a virtual environment is active, the version will be
+ignored and the environment will be used.
 
 .. versionadded:: 3.7
 
@@ -891,6 +892,13 @@ minor version. I.e. ``/usr/bin/python3.7-32`` will request usage of the
    not provably i386/32-bit". To request a specific environment, use the new
    :samp:`-V:{TAG}` argument with the complete tag.
 
+.. versionchanged:: 3.13
+
+   Virtual commands referencing ``python`` now prefer an active virtual
+   environment rather than searching :envvar:`PATH`. This handles cases where
+   the shebang specifies ``/usr/bin/env python3`` but :file:`python3.exe` is
+   not present in the active environment.
+
 The ``/usr/bin/env`` form of shebang line has one further special property.
 Before looking for installed Python interpreters, this form will search the
 executable :envvar:`PATH` for a Python executable matching the name provided
diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst
index ec39616d7c9d2b..78ebe6e8e2c166 100644
--- a/Doc/whatsnew/3.12.rst
+++ b/Doc/whatsnew/3.12.rst
@@ -46,7 +46,7 @@
    researching a change.
 
 This article explains the new features in Python 3.12, compared to 3.11.
-Python 3.12 will be released on October 2, 2023.
+Python 3.12 was released on October 2, 2023.
 For full details, see the :ref:`changelog <changelog>`.
 
 .. seealso::
@@ -121,7 +121,7 @@ Significant improvements in the standard library:
 * A :ref:`command-line interface <uuid-cli>` has been added to the
   :mod:`uuid` module
 * Due to the changes in :ref:`PEP 701 <whatsnew312-pep701>`,
-  producing tokens via the :mod:`tokenize` module is up to up to 64% faster.
+  producing tokens via the :mod:`tokenize` module is up to 64% faster.
 
 Security improvements:
 
@@ -303,7 +303,7 @@ Let's cover these in detail:
 See :pep:`701` for more details.
 
 As a positive side-effect of how this feature has been implemented (by parsing f-strings
-with :pep:`the PEG parser <617>`, now error messages for f-strings are more precise
+with :pep:`the PEG parser <617>`), now error messages for f-strings are more precise
 and include the exact location of the error. For example, in Python 3.11, the following
 f-string raises a :exc:`SyntaxError`:
 
@@ -2123,7 +2123,7 @@ Porting to Python 3.12
   The use of ``tp_dictoffset`` and ``tp_weaklistoffset`` is still
   supported, but does not fully support multiple inheritance
   (:gh:`95589`), and performance may be worse.
-  Classes declaring :c:macro:`Py_TPFLAGS_MANAGED_DICT` should call
+  Classes declaring :c:macro:`Py_TPFLAGS_MANAGED_DICT` must call
   :c:func:`!_PyObject_VisitManagedDict` and :c:func:`!_PyObject_ClearManagedDict`
   to traverse and clear their instance's dictionaries.
   To clear weakrefs, call :c:func:`PyObject_ClearWeakRefs`, as before.
diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst
index c9e6ca8bf88866..7a62963203e164 100644
--- a/Doc/whatsnew/3.13.rst
+++ b/Doc/whatsnew/3.13.rst
@@ -91,6 +91,16 @@ Other Language Changes
   of the ``optimize`` argument.
   (Contributed by Irit Katriel in :gh:`108113`).
 
+* :mod:`multiprocessing`, :mod:`concurrent.futures`, :mod:`compileall`:
+  Replace :func:`os.cpu_count` with :func:`os.process_cpu_count` to select the
+  default number of worker threads and processes. Get the CPU affinity
+  if supported.
+  (Contributed by Victor Stinner in :gh:`109649`.)
+
+* :func:`os.path.realpath` now resolves MS-DOS style file names even if
+  the file is not accessible.
+  (Contributed by Moonsik Park in :gh:`82367`.)
+
 New Modules
 ===========
 
@@ -163,6 +173,13 @@ opcode
   documented or exposed through ``dis``, and were not intended to be
   used externally.
 
+os
+--
+
+* Add :func:`os.process_cpu_count` function to get the number of logical CPUs
+  usable by the calling thread of the current process.
+  (Contributed by Victor Stinner in :gh:`109649`.)
+
 pathlib
 -------
 
@@ -170,6 +187,10 @@ pathlib
   :exc:`NotImplementedError` when a path operation isn't supported.
   (Contributed by Barney Gale in :gh:`89812`.)
 
+* Add :meth:`pathlib.Path.from_uri`, a new constructor to create a :class:`pathlib.Path`
+  object from a 'file' URI (``file:/``).
+  (Contributed by Barney Gale in :gh:`107465`.)
+
 * Add support for recursive wildcards in :meth:`pathlib.PurePath.match`.
   (Contributed by Barney Gale in :gh:`73435`.)
 
@@ -977,6 +998,21 @@ New Features
   references) now supports the :ref:`Limited API <limited-c-api>`.
   (Contributed by Victor Stinner in :gh:`108634`.)
 
+* Add :c:func:`PyObject_VisitManagedDict` and
+  :c:func:`PyObject_ClearManagedDict` functions which must be called by the
+  traverse and clear functions of a type using
+  :c:macro:`Py_TPFLAGS_MANAGED_DICT` flag.  The `pythoncapi-compat project
+  <https://github.com/python/pythoncapi-compat/>`__ can be used to get these
+  functions on Python 3.11 and 3.12.
+  (Contributed by Victor Stinner in :gh:`107073`.)
+
+* Add :c:func:`PyThreadState_GetUnchecked()` function: similar to
+  :c:func:`PyThreadState_Get()`, but don't kill the process with a fatal error
+  if it is NULL. The caller is responsible to check if the result is NULL.
+  Previously, the function was private and known as
+  ``_PyThreadState_UncheckedGet()``.
+  (Contributed by Victor Stinner in :gh:`108867`.)
+
 Porting to Python 3.13
 ----------------------
 
@@ -988,7 +1024,12 @@ Porting to Python 3.13
 
 * ``Python.h`` no longer includes the ``<unistd.h>`` standard header file. If
   needed, it should now be included explicitly. For example, it provides the
-  functions: ``close()``, ``getpagesize()``, ``getpid()`` and ``sysconf()``.
+  functions: ``read()``, ``write()``, ``close()``, ``isatty()``, ``lseek()``,
+  ``getpid()``, ``getcwd()``, ``sysconf()`` and ``getpagesize()``.
+  As a consequence, ``_POSIX_SEMAPHORES`` and ``_POSIX_THREADS`` macros are no
+  longer defined by ``Python.h``. The ``HAVE_UNISTD_H`` and ``HAVE_PTHREAD_H``
+  macros defined by ``Python.h`` can be used to decide if ``<unistd.h>`` and
+  ``<pthread.h>`` header files can be included.
   (Contributed by Victor Stinner in :gh:`108765`.)
 
 * ``Python.h`` no longer includes these standard header files: ``<time.h>``,
@@ -1267,3 +1308,6 @@ removed, although there is currently no date scheduled for their removal.
   * :c:func:`PyThread_get_key_value`: use :c:func:`PyThread_tss_get`.
   * :c:func:`PyThread_delete_key_value`: use :c:func:`PyThread_tss_delete`.
   * :c:func:`PyThread_ReInitTLS`: no longer needed.
+
+* Remove undocumented ``PY_TIMEOUT_MAX`` constant from the limited C API.
+  (Contributed by Victor Stinner in :gh:`110014`.)
diff --git a/Include/cpython/code.h b/Include/cpython/code.h
index 45b09a1265df80..cf715c55a2b3b8 100644
--- a/Include/cpython/code.h
+++ b/Include/cpython/code.h
@@ -167,7 +167,7 @@ typedef struct {
     PyObject *co_weakreflist;     /* to support weakrefs to code objects */    \
     _PyExecutorArray *co_executors;      /* executors from optimizer */        \
     _PyCoCached *_co_cached;      /* cached co_* attributes */                 \
-    uint64_t _co_instrumentation_version; /* current instrumentation version */  \
+    uintptr_t _co_instrumentation_version; /* current instrumentation version */ \
     _PyCoMonitoringData *_co_monitoring; /* Monitoring data */                 \
     int _co_firsttraceable;       /* index of first traceable instruction */   \
     /* Scratch space for extra data relating to the code object.               \
diff --git a/Include/cpython/initconfig.h b/Include/cpython/initconfig.h
index ee130467824daa..5d7b4e2d929e5b 100644
--- a/Include/cpython/initconfig.h
+++ b/Include/cpython/initconfig.h
@@ -204,6 +204,9 @@ typedef struct PyConfig {
     wchar_t *run_module;
     wchar_t *run_filename;
 
+    /* --- Set by Py_Main() -------------------------- */
+    wchar_t *sys_path_0;
+
     /* --- Private fields ---------------------------- */
 
     // Install importlib? If equals to 0, importlib is not initialized at all.
diff --git a/Include/cpython/object.h b/Include/cpython/object.h
index e5987191cfe08c..ede394d9673d7e 100644
--- a/Include/cpython/object.h
+++ b/Include/cpython/object.h
@@ -425,7 +425,7 @@ PyAPI_FUNC(int) _PyTrash_cond(PyObject *op, destructor dealloc);
         /* If "cond" is false, then _tstate remains NULL and the deallocator \
          * is run normally without involving the trashcan */ \
         if (cond) { \
-            _tstate = _PyThreadState_UncheckedGet(); \
+            _tstate = PyThreadState_GetUnchecked(); \
             if (_PyTrash_begin(_tstate, _PyObject_CAST(op))) { \
                 break; \
             } \
@@ -444,8 +444,8 @@ PyAPI_FUNC(int) _PyTrash_cond(PyObject *op, destructor dealloc);
 
 PyAPI_FUNC(void *) PyObject_GetItemData(PyObject *obj);
 
-PyAPI_FUNC(int) _PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg);
-PyAPI_FUNC(void) _PyObject_ClearManagedDict(PyObject *obj);
+PyAPI_FUNC(int) PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg);
+PyAPI_FUNC(void) PyObject_ClearManagedDict(PyObject *obj);
 
 #define TYPE_MAX_WATCHERS 8
 
diff --git a/Include/cpython/pthread_stubs.h b/Include/cpython/pthread_stubs.h
index d95ee03d8308ce..5246968ea05476 100644
--- a/Include/cpython/pthread_stubs.h
+++ b/Include/cpython/pthread_stubs.h
@@ -21,13 +21,29 @@
 #ifdef __wasi__
 // WASI's bits/alltypes.h provides type definitions when __NEED_ is set.
 // The header file can be included multiple times.
-#  define __NEED_pthread_cond_t 1
-#  define __NEED_pthread_condattr_t 1
-#  define __NEED_pthread_mutex_t 1
-#  define __NEED_pthread_mutexattr_t 1
-#  define __NEED_pthread_key_t 1
-#  define __NEED_pthread_t 1
-#  define __NEED_pthread_attr_t 1
+//
+// <sys/types.h> may also define these macros.
+#  ifndef __NEED_pthread_cond_t
+#    define __NEED_pthread_cond_t 1
+#  endif
+#  ifndef __NEED_pthread_condattr_t
+#    define __NEED_pthread_condattr_t 1
+#  endif
+#  ifndef __NEED_pthread_mutex_t
+#    define __NEED_pthread_mutex_t 1
+#  endif
+#  ifndef __NEED_pthread_mutexattr_t
+#    define __NEED_pthread_mutexattr_t 1
+#  endif
+#  ifndef __NEED_pthread_key_t
+#    define __NEED_pthread_key_t 1
+#  endif
+#  ifndef __NEED_pthread_t
+#    define __NEED_pthread_t 1
+#  endif
+#  ifndef __NEED_pthread_attr_t
+#    define __NEED_pthread_attr_t 1
+#  endif
 #  include <bits/alltypes.h>
 #else
 typedef struct { void *__x; } pthread_cond_t;
diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h
index 5e184d0ca0944b..40102f8855090e 100644
--- a/Include/cpython/pystate.h
+++ b/Include/cpython/pystate.h
@@ -92,6 +92,19 @@ struct _ts {
         /* padding to align to 4 bytes */
         unsigned int :24;
     } _status;
+#ifdef Py_BUILD_CORE
+#  define _PyThreadState_WHENCE_NOTSET -1
+#  define _PyThreadState_WHENCE_UNKNOWN 0
+#  define _PyThreadState_WHENCE_INTERP 1
+#  define _PyThreadState_WHENCE_THREADING 2
+#  define _PyThreadState_WHENCE_GILSTATE 3
+#  define _PyThreadState_WHENCE_EXEC 4
+#endif
+    int _whence;
+
+    /* Thread state (_Py_THREAD_ATTACHED, _Py_THREAD_DETACHED, _Py_THREAD_GC).
+       See Include/internal/pycore_pystate.h for more details. */
+    int state;
 
     int py_recursion_remaining;
     int py_recursion_limit;
@@ -209,7 +222,8 @@ struct _ts {
 
 /* Similar to PyThreadState_Get(), but don't issue a fatal error
  * if it is NULL. */
-PyAPI_FUNC(PyThreadState *) _PyThreadState_UncheckedGet(void);
+PyAPI_FUNC(PyThreadState *) PyThreadState_GetUnchecked(void);
+
 
 // Disable tracing and profiling.
 PyAPI_FUNC(void) PyThreadState_EnterTracing(PyThreadState *tstate);
diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h
index 150e16faa96ca1..4988caa803723d 100644
--- a/Include/cpython/pystats.h
+++ b/Include/cpython/pystats.h
@@ -86,10 +86,6 @@ typedef struct _object_stats {
     uint64_t type_cache_dunder_hits;
     uint64_t type_cache_dunder_misses;
     uint64_t type_cache_collisions;
-    uint64_t optimization_attempts;
-    uint64_t optimization_traces_created;
-    uint64_t optimization_traces_executed;
-    uint64_t optimization_uops_executed;
     /* Temporary value used during GC */
     uint64_t object_visits;
 } ObjectStats;
@@ -100,10 +96,35 @@ typedef struct _gc_stats {
     uint64_t objects_collected;
 } GCStats;
 
+typedef struct _uop_stats {
+    uint64_t execution_count;
+} UOpStats;
+
+#define _Py_UOP_HIST_SIZE 32
+
+typedef struct _optimization_stats {
+    uint64_t attempts;
+    uint64_t traces_created;
+    uint64_t traces_executed;
+    uint64_t uops_executed;
+    uint64_t trace_stack_overflow;
+    uint64_t trace_stack_underflow;
+    uint64_t trace_too_long;
+    uint64_t trace_too_short;
+    uint64_t inner_loop;
+    uint64_t recursive_call;
+    UOpStats opcode[512];
+    uint64_t unsupported_opcode[256];
+    uint64_t trace_length_hist[_Py_UOP_HIST_SIZE];
+    uint64_t trace_run_length_hist[_Py_UOP_HIST_SIZE];
+    uint64_t optimized_trace_length_hist[_Py_UOP_HIST_SIZE];
+} OptimizationStats;
+
 typedef struct _stats {
     OpcodeStats opcode_stats[256];
     CallStats call_stats;
     ObjectStats object_stats;
+    OptimizationStats optimization_stats;
     GCStats *gc_stats;
 } PyStats;
 
diff --git a/Include/cpython/pythread.h b/Include/cpython/pythread.h
index cd2aab72d52df3..03f710a9f7ef2e 100644
--- a/Include/cpython/pythread.h
+++ b/Include/cpython/pythread.h
@@ -2,6 +2,14 @@
 #  error "this header file must not be included directly"
 #endif
 
+// PY_TIMEOUT_MAX is the highest usable value (in microseconds) of PY_TIMEOUT_T
+// type, and depends on the system threading API.
+//
+// NOTE: this isn't the same value as `_thread.TIMEOUT_MAX`. The _thread module
+// exposes a higher-level API, with timeouts expressed in seconds and
+// floating-point numbers allowed.
+PyAPI_DATA(const long long) PY_TIMEOUT_MAX;
+
 #define PYTHREAD_INVALID_THREAD_ID ((unsigned long)-1)
 
 #ifdef HAVE_PTHREAD_H
diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h
index 23d0fa399d7e6f..d3ea3a898bb425 100644
--- a/Include/internal/pycore_ceval.h
+++ b/Include/internal/pycore_ceval.h
@@ -121,7 +121,6 @@ extern void _PyEval_FiniGIL(PyInterpreterState *interp);
 
 extern void _PyEval_AcquireLock(PyThreadState *tstate);
 extern void _PyEval_ReleaseLock(PyInterpreterState *, PyThreadState *);
-extern PyThreadState * _PyThreadState_SwapNoGIL(PyThreadState *);
 
 extern void _PyEval_DeactivateOpCache(void);
 
@@ -193,6 +192,39 @@ int _PyEval_UnpackIterable(PyThreadState *tstate, PyObject *v, int argcnt, int a
 void _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame);
 
 
+#define _PY_GIL_DROP_REQUEST_BIT 0
+#define _PY_SIGNALS_PENDING_BIT 1
+#define _PY_CALLS_TO_DO_BIT 2
+#define _PY_ASYNC_EXCEPTION_BIT 3
+#define _PY_GC_SCHEDULED_BIT 4
+
+/* Reserve a few bits for future use */
+#define _PY_EVAL_EVENTS_BITS 8
+#define _PY_EVAL_EVENTS_MASK ((1 << _PY_EVAL_EVENTS_BITS)-1)
+
+static inline void
+_Py_set_eval_breaker_bit(PyInterpreterState *interp, uint32_t bit, uint32_t set)
+{
+    assert(set == 0 || set == 1);
+    uintptr_t to_set = set << bit;
+    uintptr_t mask = ((uintptr_t)1) << bit;
+    uintptr_t old = _Py_atomic_load_uintptr(&interp->ceval.eval_breaker);
+    if ((old & mask) == to_set) {
+        return;
+    }
+    uintptr_t new;
+    do {
+        new = (old & ~mask) | to_set;
+    } while (!_Py_atomic_compare_exchange_uintptr(&interp->ceval.eval_breaker, &old, new));
+}
+
+static inline bool
+_Py_eval_breaker_bit_is_set(PyInterpreterState *interp, int32_t bit)
+{
+    return _Py_atomic_load_uintptr_relaxed(&interp->ceval.eval_breaker) & (((uintptr_t)1) << bit);
+}
+
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h
index d0af5b542233e0..47971fbf2b4bfe 100644
--- a/Include/internal/pycore_ceval_state.h
+++ b/Include/internal/pycore_ceval_state.h
@@ -17,11 +17,7 @@ struct _pending_calls {
     int busy;
     PyThread_type_lock lock;
     /* Request for running pending calls. */
-    _Py_atomic_int calls_to_do;
-    /* Request for looking at the `async_exc` field of the current
-       thread state.
-       Guarded by the GIL. */
-    int async_exc;
+    int32_t calls_to_do;
 #define NPENDINGCALLS 32
     struct _pending_call {
         _Py_pending_call_func func;
@@ -62,11 +58,6 @@ struct _ceval_runtime_state {
         int _not_used;
 #endif
     } perf;
-    /* Request for checking signals. It is shared by all interpreters (see
-       bpo-40513). Any thread of any interpreter can receive a signal, but only
-       the main thread of the main interpreter can handle signals: see
-       _Py_ThreadCanHandleSignals(). */
-    _Py_atomic_int signals_pending;
     /* Pending calls to be made only on the main thread. */
     struct _pending_calls pending_mainthread;
 };
@@ -87,14 +78,12 @@ struct _ceval_state {
      * the fast path in the eval loop.
      * It is by far the hottest field in this struct and
      * should be placed at the beginning. */
-    _Py_atomic_int eval_breaker;
-    /* Request for dropping the GIL */
-    _Py_atomic_int gil_drop_request;
+    uintptr_t eval_breaker;
+    /* Avoid false sharing */
+    int64_t padding[7];
     int recursion_limit;
     struct _gil_runtime_state *gil;
     int own_gil;
-    /* The GC is ready to be executed */
-    _Py_atomic_int gc_scheduled;
     struct _pending_calls pending;
 };
 
diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
index a77fa11baf8413..d31d8363d771ca 100644
--- a/Include/internal/pycore_code.h
+++ b/Include/internal/pycore_code.h
@@ -282,6 +282,17 @@ extern int _PyStaticCode_Init(PyCodeObject *co);
 #define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) \
     do { if (_Py_stats && PyFunction_Check(callable)) _Py_stats->call_stats.eval_calls[name]++; } while (0)
 #define GC_STAT_ADD(gen, name, n) do { if (_Py_stats) _Py_stats->gc_stats[(gen)].name += (n); } while (0)
+#define OPT_STAT_INC(name) do { if (_Py_stats) _Py_stats->optimization_stats.name++; } while (0)
+#define UOP_EXE_INC(opname) do { if (_Py_stats) _Py_stats->optimization_stats.opcode[opname].execution_count++; } while (0)
+#define OPT_UNSUPPORTED_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.unsupported_opcode[opname]++; } while (0)
+#define OPT_HIST(length, name) \
+    do { \
+        if (_Py_stats) { \
+            int bucket = _Py_bit_length(length >= 1 ? length - 1 : 0); \
+            bucket = (bucket >= _Py_UOP_HIST_SIZE) ? _Py_UOP_HIST_SIZE - 1 : bucket; \
+            _Py_stats->optimization_stats.name[bucket]++; \
+        } \
+    } while (0)
 
 // Export for '_opcode' shared extension
 PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
@@ -296,6 +307,10 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
 #define EVAL_CALL_STAT_INC(name) ((void)0)
 #define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) ((void)0)
 #define GC_STAT_ADD(gen, name, n) ((void)0)
+#define OPT_STAT_INC(name) ((void)0)
+#define UOP_EXE_INC(opname) ((void)0)
+#define OPT_UNSUPPORTED_OPCODE(opname) ((void)0)
+#define OPT_HIST(length, name) ((void)0)
 #endif  // !Py_STATS
 
 // Utility functions for reading/writing 32/64-bit values in the inline caches.
diff --git a/Include/internal/pycore_condvar.h b/Include/internal/pycore_condvar.h
index 489e67d4ec4f9f..34c21aaad43197 100644
--- a/Include/internal/pycore_condvar.h
+++ b/Include/internal/pycore_condvar.h
@@ -5,18 +5,8 @@
 #  error "this header requires Py_BUILD_CORE define"
 #endif
 
-#ifndef MS_WINDOWS
-#  include <unistd.h>             // _POSIX_THREADS
-#endif
+#include "pycore_pythread.h"      // _POSIX_THREADS
 
-#ifndef _POSIX_THREADS
-/* This means pthreads are not implemented in libc headers, hence the macro
-   not present in unistd.h. But they still can be implemented as an external
-   library (e.g. gnu pth in pthread emulation) */
-# ifdef HAVE_PTHREAD_H
-#  include <pthread.h>            // _POSIX_THREADS
-# endif
-#endif
 
 #ifdef _POSIX_THREADS
 /*
diff --git a/Include/internal/pycore_importdl.h b/Include/internal/pycore_importdl.h
new file mode 100644
index 00000000000000..dee64241c763f3
--- /dev/null
+++ b/Include/internal/pycore_importdl.h
@@ -0,0 +1,57 @@
+#ifndef Py_INTERNAL_IMPORTDL_H
+#define Py_INTERNAL_IMPORTDL_H
+
+#include "patchlevel.h"           // PY_MAJOR_VERSION
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef Py_BUILD_CORE
+#  error "this header requires Py_BUILD_CORE define"
+#endif
+
+
+extern const char *_PyImport_DynLoadFiletab[];
+
+extern PyObject *_PyImport_LoadDynamicModuleWithSpec(PyObject *spec, FILE *);
+
+typedef PyObject *(*PyModInitFunction)(void);
+
+/* Max length of module suffix searched for -- accommodates "module.slb" */
+#define MAXSUFFIXSIZE 12
+
+#ifdef MS_WINDOWS
+#include <windows.h>
+typedef FARPROC dl_funcptr;
+
+#ifdef _DEBUG
+#  define PYD_DEBUG_SUFFIX "_d"
+#else
+#  define PYD_DEBUG_SUFFIX ""
+#endif
+
+#ifdef Py_NOGIL
+#  define PYD_THREADING_TAG "t"
+#else
+#  define PYD_THREADING_TAG ""
+#endif
+
+#ifdef PYD_PLATFORM_TAG
+#  define PYD_SOABI "cp" Py_STRINGIFY(PY_MAJOR_VERSION) Py_STRINGIFY(PY_MINOR_VERSION) PYD_THREADING_TAG "-" PYD_PLATFORM_TAG
+#else
+#  define PYD_SOABI "cp" Py_STRINGIFY(PY_MAJOR_VERSION) Py_STRINGIFY(PY_MINOR_VERSION) PYD_THREADING_TAG
+#endif
+
+#define PYD_TAGGED_SUFFIX PYD_DEBUG_SUFFIX "." PYD_SOABI ".pyd"
+#define PYD_UNTAGGED_SUFFIX PYD_DEBUG_SUFFIX ".pyd"
+
+#else
+typedef void (*dl_funcptr)(void);
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_INTERNAL_IMPORTDL_H */
diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h
index 0912bd175fe4f7..523dfdc21deda4 100644
--- a/Include/internal/pycore_interp.h
+++ b/Include/internal/pycore_interp.h
@@ -39,6 +39,32 @@ struct _Py_long_state {
     int max_str_digits;
 };
 
+
+/* cross-interpreter data registry */
+
+/* For now we use a global registry of shareable classes.  An
+   alternative would be to add a tp_* slot for a class's
+   crossinterpdatafunc. It would be simpler and more efficient. */
+
+struct _xidregitem;
+
+struct _xidregitem {
+    struct _xidregitem *prev;
+    struct _xidregitem *next;
+    /* This can be a dangling pointer, but only if weakref is set. */
+    PyTypeObject *cls;
+    /* This is NULL for builtin types. */
+    PyObject *weakref;
+    size_t refcount;
+    crossinterpdatafunc getdata;
+};
+
+struct _xidregistry {
+    PyThread_type_lock mutex;
+    struct _xidregitem *head;
+};
+
+
 /* interpreter state */
 
 /* PyInterpreterState holds the global state for one of the runtime's
@@ -67,12 +93,13 @@ struct _is {
     int _initialized;
     int finalizing;
 
-    uint64_t monitoring_version;
-    uint64_t last_restart_version;
+    uintptr_t last_restart_version;
     struct pythreads {
         uint64_t next_unique_id;
         /* The linked list of threads, newest first. */
         PyThreadState *head;
+        /* The thread currently executing in the __main__ module, if any. */
+        PyThreadState *main;
         /* Used in Modules/_threadmodule.c. */
         long count;
         /* Support for runtime thread stack size tuning.
@@ -148,6 +175,9 @@ struct _is {
     Py_ssize_t co_extra_user_count;
     freefunc co_extra_freefuncs[MAX_CO_EXTRA_USERS];
 
+    // XXX Remove this field once we have a tp_* slot.
+    struct _xidregistry xidregistry;
+
 #ifdef HAVE_FORK
     PyObject *before_forkers;
     PyObject *after_forkers_parent;
@@ -237,21 +267,6 @@ _PyInterpreterState_SetFinalizing(PyInterpreterState *interp, PyThreadState *tst
 }
 
 
-/* cross-interpreter data registry */
-
-/* For now we use a global registry of shareable classes.  An
-   alternative would be to add a tp_* slot for a class's
-   crossinterpdatafunc. It would be simpler and more efficient. */
-
-struct _xidregitem;
-
-struct _xidregitem {
-    struct _xidregitem *prev;
-    struct _xidregitem *next;
-    PyObject *cls;  // weakref to a PyTypeObject
-    crossinterpdatafunc getdata;
-};
-
 extern PyInterpreterState* _PyInterpreterState_LookUpID(int64_t);
 
 extern int _PyInterpreterState_IDInitref(PyInterpreterState *);
diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h
index 16c1637e496033..8ef398c5db09f6 100644
--- a/Include/internal/pycore_opcode_metadata.h
+++ b/Include/internal/pycore_opcode_metadata.h
@@ -46,40 +46,49 @@
 #define _GUARD_TYPE_VERSION 318
 #define _CHECK_MANAGED_OBJECT_HAS_VALUES 319
 #define _LOAD_ATTR_INSTANCE_VALUE 320
-#define _LOAD_ATTR_SLOT 321
-#define _GUARD_DORV_VALUES 322
-#define _STORE_ATTR_INSTANCE_VALUE 323
-#define _GUARD_TYPE_VERSION_STORE 324
-#define _STORE_ATTR_SLOT 325
-#define _IS_NONE 326
-#define _ITER_CHECK_LIST 327
-#define _ITER_JUMP_LIST 328
-#define _IS_ITER_EXHAUSTED_LIST 329
-#define _ITER_NEXT_LIST 330
-#define _ITER_CHECK_TUPLE 331
-#define _ITER_JUMP_TUPLE 332
-#define _IS_ITER_EXHAUSTED_TUPLE 333
-#define _ITER_NEXT_TUPLE 334
-#define _ITER_CHECK_RANGE 335
-#define _ITER_JUMP_RANGE 336
-#define _IS_ITER_EXHAUSTED_RANGE 337
-#define _ITER_NEXT_RANGE 338
-#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 339
-#define _GUARD_KEYS_VERSION 340
-#define _LOAD_ATTR_METHOD_WITH_VALUES 341
-#define _LOAD_ATTR_METHOD_NO_DICT 342
-#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 343
-#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 344
-#define _CHECK_PEP_523 345
-#define _CHECK_FUNCTION_EXACT_ARGS 346
-#define _CHECK_STACK_SPACE 347
-#define _INIT_CALL_PY_EXACT_ARGS 348
-#define _PUSH_FRAME 349
-#define _POP_JUMP_IF_FALSE 350
-#define _POP_JUMP_IF_TRUE 351
-#define _JUMP_TO_TOP 352
-#define _SAVE_CURRENT_IP 353
-#define _INSERT 354
+#define _CHECK_ATTR_MODULE 321
+#define _LOAD_ATTR_MODULE 322
+#define _CHECK_ATTR_WITH_HINT 323
+#define _LOAD_ATTR_WITH_HINT 324
+#define _LOAD_ATTR_SLOT 325
+#define _CHECK_ATTR_CLASS 326
+#define _LOAD_ATTR_CLASS 327
+#define _GUARD_DORV_VALUES 328
+#define _STORE_ATTR_INSTANCE_VALUE 329
+#define _STORE_ATTR_SLOT 330
+#define _IS_NONE 331
+#define _ITER_CHECK_LIST 332
+#define _ITER_JUMP_LIST 333
+#define _IS_ITER_EXHAUSTED_LIST 334
+#define _ITER_NEXT_LIST 335
+#define _ITER_CHECK_TUPLE 336
+#define _ITER_JUMP_TUPLE 337
+#define _IS_ITER_EXHAUSTED_TUPLE 338
+#define _ITER_NEXT_TUPLE 339
+#define _ITER_CHECK_RANGE 340
+#define _ITER_JUMP_RANGE 341
+#define _IS_ITER_EXHAUSTED_RANGE 342
+#define _ITER_NEXT_RANGE 343
+#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 344
+#define _GUARD_KEYS_VERSION 345
+#define _LOAD_ATTR_METHOD_WITH_VALUES 346
+#define _LOAD_ATTR_METHOD_NO_DICT 347
+#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 348
+#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 349
+#define _CHECK_ATTR_METHOD_LAZY_DICT 350
+#define _LOAD_ATTR_METHOD_LAZY_DICT 351
+#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 352
+#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 353
+#define _CHECK_PEP_523 354
+#define _CHECK_FUNCTION_EXACT_ARGS 355
+#define _CHECK_STACK_SPACE 356
+#define _INIT_CALL_PY_EXACT_ARGS 357
+#define _PUSH_FRAME 358
+#define _POP_JUMP_IF_FALSE 359
+#define _POP_JUMP_IF_TRUE 360
+#define _JUMP_TO_TOP 361
+#define _SAVE_CURRENT_IP 362
+#define _INSERT 363
 
 extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump);
 #ifdef NEED_OPCODE_METADATA
@@ -361,14 +370,26 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump)  {
             return 1;
         case LOAD_ATTR_INSTANCE_VALUE:
             return 1;
+        case _CHECK_ATTR_MODULE:
+            return 1;
+        case _LOAD_ATTR_MODULE:
+            return 1;
         case LOAD_ATTR_MODULE:
             return 1;
+        case _CHECK_ATTR_WITH_HINT:
+            return 1;
+        case _LOAD_ATTR_WITH_HINT:
+            return 1;
         case LOAD_ATTR_WITH_HINT:
             return 1;
         case _LOAD_ATTR_SLOT:
             return 1;
         case LOAD_ATTR_SLOT:
             return 1;
+        case _CHECK_ATTR_CLASS:
+            return 1;
+        case _LOAD_ATTR_CLASS:
+            return 1;
         case LOAD_ATTR_CLASS:
             return 1;
         case LOAD_ATTR_PROPERTY:
@@ -383,8 +404,6 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump)  {
             return 2;
         case STORE_ATTR_WITH_HINT:
             return 2;
-        case _GUARD_TYPE_VERSION_STORE:
-            return 1;
         case _STORE_ATTR_SLOT:
             return 2;
         case STORE_ATTR_SLOT:
@@ -509,10 +528,18 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump)  {
             return 1;
         case LOAD_ATTR_METHOD_NO_DICT:
             return 1;
+        case _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES:
+            return 1;
         case LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES:
             return 1;
+        case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT:
+            return 1;
         case LOAD_ATTR_NONDESCRIPTOR_NO_DICT:
             return 1;
+        case _CHECK_ATTR_METHOD_LAZY_DICT:
+            return 1;
+        case _LOAD_ATTR_METHOD_LAZY_DICT:
+            return 1;
         case LOAD_ATTR_METHOD_LAZY_DICT:
             return 1;
         case INSTRUMENTED_CALL:
@@ -919,16 +946,28 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump)  {
             return ((oparg & 1) ? 1 : 0) + 1;
         case LOAD_ATTR_INSTANCE_VALUE:
             return (oparg & 1 ? 1 : 0) + 1;
+        case _CHECK_ATTR_MODULE:
+            return 1;
+        case _LOAD_ATTR_MODULE:
+            return ((oparg & 1) ? 1 : 0) + 1;
         case LOAD_ATTR_MODULE:
+            return (oparg & 1 ? 1 : 0) + 1;
+        case _CHECK_ATTR_WITH_HINT:
+            return 1;
+        case _LOAD_ATTR_WITH_HINT:
             return ((oparg & 1) ? 1 : 0) + 1;
         case LOAD_ATTR_WITH_HINT:
-            return ((oparg & 1) ? 1 : 0) + 1;
+            return (oparg & 1 ? 1 : 0) + 1;
         case _LOAD_ATTR_SLOT:
             return ((oparg & 1) ? 1 : 0) + 1;
         case LOAD_ATTR_SLOT:
             return (oparg & 1 ? 1 : 0) + 1;
-        case LOAD_ATTR_CLASS:
+        case _CHECK_ATTR_CLASS:
+            return 1;
+        case _LOAD_ATTR_CLASS:
             return ((oparg & 1) ? 1 : 0) + 1;
+        case LOAD_ATTR_CLASS:
+            return (oparg & 1 ? 1 : 0) + 1;
         case LOAD_ATTR_PROPERTY:
             return 1;
         case LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN:
@@ -941,8 +980,6 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump)  {
             return 0;
         case STORE_ATTR_WITH_HINT:
             return 0;
-        case _GUARD_TYPE_VERSION_STORE:
-            return 1;
         case _STORE_ATTR_SLOT:
             return 0;
         case STORE_ATTR_SLOT:
@@ -1067,10 +1104,18 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump)  {
             return 2;
         case LOAD_ATTR_METHOD_NO_DICT:
             return 2;
+        case _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES:
+            return 1;
         case LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES:
             return 1;
+        case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT:
+            return 1;
         case LOAD_ATTR_NONDESCRIPTOR_NO_DICT:
             return 1;
+        case _CHECK_ATTR_METHOD_LAZY_DICT:
+            return 1;
+        case _LOAD_ATTR_METHOD_LAZY_DICT:
+            return 2;
         case LOAD_ATTR_METHOD_LAZY_DICT:
             return 2;
         case INSTRUMENTED_CALL:
@@ -1402,10 +1447,16 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = {
     [_CHECK_MANAGED_OBJECT_HAS_VALUES] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG },
     [_LOAD_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
     [LOAD_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
+    [_CHECK_ATTR_MODULE] = { true, INSTR_FMT_IXC0, HAS_DEOPT_FLAG },
+    [_LOAD_ATTR_MODULE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
     [LOAD_ATTR_MODULE] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
+    [_CHECK_ATTR_WITH_HINT] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG },
+    [_LOAD_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG },
     [LOAD_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG },
     [_LOAD_ATTR_SLOT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
     [LOAD_ATTR_SLOT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
+    [_CHECK_ATTR_CLASS] = { true, INSTR_FMT_IXC0, HAS_DEOPT_FLAG },
+    [_LOAD_ATTR_CLASS] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG },
     [LOAD_ATTR_CLASS] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
     [LOAD_ATTR_PROPERTY] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
     [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG },
@@ -1413,7 +1464,6 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = {
     [_STORE_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IXC, 0 },
     [STORE_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IXC000, HAS_DEOPT_FLAG },
     [STORE_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG },
-    [_GUARD_TYPE_VERSION_STORE] = { true, INSTR_FMT_IXC0, HAS_DEOPT_FLAG },
     [_STORE_ATTR_SLOT] = { true, INSTR_FMT_IXC, 0 },
     [STORE_ATTR_SLOT] = { true, INSTR_FMT_IXC000, HAS_DEOPT_FLAG },
     [COMPARE_OP] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG },
@@ -1476,8 +1526,12 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = {
     [LOAD_ATTR_METHOD_WITH_VALUES] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
     [_LOAD_ATTR_METHOD_NO_DICT] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG },
     [LOAD_ATTR_METHOD_NO_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
+    [_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG },
     [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
+    [_LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG },
     [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
+    [_CHECK_ATTR_METHOD_LAZY_DICT] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG },
+    [_LOAD_ATTR_METHOD_LAZY_DICT] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG },
     [LOAD_ATTR_METHOD_LAZY_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
     [INSTRUMENTED_CALL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG },
     [CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG },
@@ -1637,9 +1691,12 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN
     [LOAD_SUPER_ATTR_METHOD] = { .nuops = 1, .uops = { { LOAD_SUPER_ATTR_METHOD, 0, 0 } } },
     [LOAD_ATTR] = { .nuops = 1, .uops = { { LOAD_ATTR, 0, 0 } } },
     [LOAD_ATTR_INSTANCE_VALUE] = { .nuops = 3, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _CHECK_MANAGED_OBJECT_HAS_VALUES, 0, 0 }, { _LOAD_ATTR_INSTANCE_VALUE, 1, 3 } } },
+    [LOAD_ATTR_MODULE] = { .nuops = 2, .uops = { { _CHECK_ATTR_MODULE, 2, 1 }, { _LOAD_ATTR_MODULE, 1, 3 } } },
+    [LOAD_ATTR_WITH_HINT] = { .nuops = 3, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _CHECK_ATTR_WITH_HINT, 0, 0 }, { _LOAD_ATTR_WITH_HINT, 1, 3 } } },
     [LOAD_ATTR_SLOT] = { .nuops = 2, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _LOAD_ATTR_SLOT, 1, 3 } } },
-    [STORE_ATTR_INSTANCE_VALUE] = { .nuops = 3, .uops = { { _GUARD_TYPE_VERSION_STORE, 2, 1 }, { _GUARD_DORV_VALUES, 0, 0 }, { _STORE_ATTR_INSTANCE_VALUE, 1, 3 } } },
-    [STORE_ATTR_SLOT] = { .nuops = 2, .uops = { { _GUARD_TYPE_VERSION_STORE, 2, 1 }, { _STORE_ATTR_SLOT, 1, 3 } } },
+    [LOAD_ATTR_CLASS] = { .nuops = 2, .uops = { { _CHECK_ATTR_CLASS, 2, 1 }, { _LOAD_ATTR_CLASS, 4, 5 } } },
+    [STORE_ATTR_INSTANCE_VALUE] = { .nuops = 3, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _GUARD_DORV_VALUES, 0, 0 }, { _STORE_ATTR_INSTANCE_VALUE, 1, 3 } } },
+    [STORE_ATTR_SLOT] = { .nuops = 2, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _STORE_ATTR_SLOT, 1, 3 } } },
     [COMPARE_OP] = { .nuops = 1, .uops = { { COMPARE_OP, 0, 0 } } },
     [COMPARE_OP_FLOAT] = { .nuops = 1, .uops = { { COMPARE_OP_FLOAT, 0, 0 } } },
     [COMPARE_OP_INT] = { .nuops = 1, .uops = { { COMPARE_OP_INT, 0, 0 } } },
@@ -1659,6 +1716,9 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN
     [PUSH_EXC_INFO] = { .nuops = 1, .uops = { { PUSH_EXC_INFO, 0, 0 } } },
     [LOAD_ATTR_METHOD_WITH_VALUES] = { .nuops = 4, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, 0, 0 }, { _GUARD_KEYS_VERSION, 2, 3 }, { _LOAD_ATTR_METHOD_WITH_VALUES, 4, 5 } } },
     [LOAD_ATTR_METHOD_NO_DICT] = { .nuops = 2, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _LOAD_ATTR_METHOD_NO_DICT, 4, 5 } } },
+    [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = { .nuops = 4, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, 0, 0 }, { _GUARD_KEYS_VERSION, 2, 3 }, { _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES, 4, 5 } } },
+    [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = { .nuops = 2, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _LOAD_ATTR_NONDESCRIPTOR_NO_DICT, 4, 5 } } },
+    [LOAD_ATTR_METHOD_LAZY_DICT] = { .nuops = 3, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _CHECK_ATTR_METHOD_LAZY_DICT, 0, 0 }, { _LOAD_ATTR_METHOD_LAZY_DICT, 4, 5 } } },
     [CALL_BOUND_METHOD_EXACT_ARGS] = { .nuops = 9, .uops = { { _CHECK_PEP_523, 0, 0 }, { _CHECK_CALL_BOUND_METHOD_EXACT_ARGS, 0, 0 }, { _INIT_CALL_BOUND_METHOD_EXACT_ARGS, 0, 0 }, { _CHECK_FUNCTION_EXACT_ARGS, 2, 1 }, { _CHECK_STACK_SPACE, 0, 0 }, { _INIT_CALL_PY_EXACT_ARGS, 0, 0 }, { _SET_IP, 7, 3 }, { _SAVE_CURRENT_IP, 0, 0 }, { _PUSH_FRAME, 0, 0 } } },
     [CALL_PY_EXACT_ARGS] = { .nuops = 7, .uops = { { _CHECK_PEP_523, 0, 0 }, { _CHECK_FUNCTION_EXACT_ARGS, 2, 1 }, { _CHECK_STACK_SPACE, 0, 0 }, { _INIT_CALL_PY_EXACT_ARGS, 0, 0 }, { _SET_IP, 7, 3 }, { _SAVE_CURRENT_IP, 0, 0 }, { _PUSH_FRAME, 0, 0 } } },
     [CALL_TYPE_1] = { .nuops = 1, .uops = { { CALL_TYPE_1, 0, 0 } } },
@@ -1711,10 +1771,15 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = {
     [_GUARD_TYPE_VERSION] = "_GUARD_TYPE_VERSION",
     [_CHECK_MANAGED_OBJECT_HAS_VALUES] = "_CHECK_MANAGED_OBJECT_HAS_VALUES",
     [_LOAD_ATTR_INSTANCE_VALUE] = "_LOAD_ATTR_INSTANCE_VALUE",
+    [_CHECK_ATTR_MODULE] = "_CHECK_ATTR_MODULE",
+    [_LOAD_ATTR_MODULE] = "_LOAD_ATTR_MODULE",
+    [_CHECK_ATTR_WITH_HINT] = "_CHECK_ATTR_WITH_HINT",
+    [_LOAD_ATTR_WITH_HINT] = "_LOAD_ATTR_WITH_HINT",
     [_LOAD_ATTR_SLOT] = "_LOAD_ATTR_SLOT",
+    [_CHECK_ATTR_CLASS] = "_CHECK_ATTR_CLASS",
+    [_LOAD_ATTR_CLASS] = "_LOAD_ATTR_CLASS",
     [_GUARD_DORV_VALUES] = "_GUARD_DORV_VALUES",
     [_STORE_ATTR_INSTANCE_VALUE] = "_STORE_ATTR_INSTANCE_VALUE",
-    [_GUARD_TYPE_VERSION_STORE] = "_GUARD_TYPE_VERSION_STORE",
     [_STORE_ATTR_SLOT] = "_STORE_ATTR_SLOT",
     [_IS_NONE] = "_IS_NONE",
     [_ITER_CHECK_LIST] = "_ITER_CHECK_LIST",
@@ -1733,6 +1798,10 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = {
     [_GUARD_KEYS_VERSION] = "_GUARD_KEYS_VERSION",
     [_LOAD_ATTR_METHOD_WITH_VALUES] = "_LOAD_ATTR_METHOD_WITH_VALUES",
     [_LOAD_ATTR_METHOD_NO_DICT] = "_LOAD_ATTR_METHOD_NO_DICT",
+    [_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = "_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES",
+    [_LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = "_LOAD_ATTR_NONDESCRIPTOR_NO_DICT",
+    [_CHECK_ATTR_METHOD_LAZY_DICT] = "_CHECK_ATTR_METHOD_LAZY_DICT",
+    [_LOAD_ATTR_METHOD_LAZY_DICT] = "_LOAD_ATTR_METHOD_LAZY_DICT",
     [_CHECK_CALL_BOUND_METHOD_EXACT_ARGS] = "_CHECK_CALL_BOUND_METHOD_EXACT_ARGS",
     [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = "_INIT_CALL_BOUND_METHOD_EXACT_ARGS",
     [_CHECK_PEP_523] = "_CHECK_PEP_523",
diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h
index 2e568f8aeeb152..7135b1e966feb5 100644
--- a/Include/internal/pycore_pystate.h
+++ b/Include/internal/pycore_pystate.h
@@ -11,6 +11,33 @@ extern "C" {
 #include "pycore_runtime.h"       // _PyRuntime
 
 
+// Values for PyThreadState.state. A thread must be in the "attached" state
+// before calling most Python APIs. If the GIL is enabled, then "attached"
+// implies that the thread holds the GIL and "detached" implies that the
+// thread does not hold the GIL (or is in the process of releasing it). In
+// `--disable-gil` builds, multiple threads may be "attached" to the same
+// interpreter at the same time. Only the "bound" thread may perform the
+// transitions between "attached" and "detached" on its own PyThreadState.
+//
+// The "gc" state is used to implement stop-the-world pauses, such as for
+// cyclic garbage collection. It is only used in `--disable-gil` builds. It is
+// similar to the "detached" state, but only the thread performing a
+// stop-the-world pause may transition threads between the "detached" and "gc"
+// states. A thread trying to "attach" from the "gc" state will block until
+// it is transitioned back to "detached" when the stop-the-world pause is
+// complete.
+//
+// State transition diagram:
+//
+//            (bound thread)        (stop-the-world thread)
+// [attached]       <->       [detached]       <->       [gc]
+//
+// See `_PyThreadState_Attach()` and `_PyThreadState_Detach()`.
+#define _Py_THREAD_DETACHED     0
+#define _Py_THREAD_ATTACHED     1
+#define _Py_THREAD_GC           2
+
+
 /* Check if the current thread is the main thread.
    Use _Py_IsMainInterpreter() to check if it's the main interpreter. */
 static inline int
@@ -44,6 +71,12 @@ _Py_IsMainInterpreterFinalizing(PyInterpreterState *interp)
             interp == &_PyRuntime._main_interpreter);
 }
 
+// Export for _xxsubinterpreters module.
+PyAPI_FUNC(int) _PyInterpreterState_SetRunningMain(PyInterpreterState *);
+PyAPI_FUNC(void) _PyInterpreterState_SetNotRunningMain(PyInterpreterState *);
+PyAPI_FUNC(int) _PyInterpreterState_IsRunningMain(PyInterpreterState *);
+PyAPI_FUNC(int) _PyInterpreterState_FailIfRunningMain(PyInterpreterState *);
+
 
 static inline const PyConfig *
 _Py_GetMainConfig(void)
@@ -87,7 +120,7 @@ PyAPI_FUNC(PyThreadState *) _PyThreadState_GetCurrent(void);
 
    The caller must hold the GIL.
 
-   See also PyThreadState_Get() and _PyThreadState_UncheckedGet(). */
+   See also PyThreadState_Get() and PyThreadState_GetUnchecked(). */
 static inline PyThreadState*
 _PyThreadState_GET(void)
 {
@@ -98,6 +131,21 @@ _PyThreadState_GET(void)
 #endif
 }
 
+// Attaches the current thread to the interpreter.
+//
+// This may block while acquiring the GIL (if the GIL is enabled) or while
+// waiting for a stop-the-world pause (if the GIL is disabled).
+//
+// High-level code should generally call PyEval_RestoreThread() instead, which
+// calls this function.
+void _PyThreadState_Attach(PyThreadState *tstate);
+
+// Detaches the current thread from the interpreter.
+//
+// High-level code should generally call PyEval_SaveThread() instead, which
+// calls this function.
+void _PyThreadState_Detach(PyThreadState *tstate);
+
 
 static inline void
 _Py_EnsureFuncTstateNotNULL(const char *func, PyThreadState *tstate)
@@ -134,7 +182,9 @@ static inline PyInterpreterState* _PyInterpreterState_GET(void) {
 
 // PyThreadState functions
 
-extern PyThreadState * _PyThreadState_New(PyInterpreterState *interp);
+extern PyThreadState * _PyThreadState_New(
+    PyInterpreterState *interp,
+    int whence);
 extern void _PyThreadState_Bind(PyThreadState *tstate);
 extern void _PyThreadState_DeleteExcept(PyThreadState *tstate);
 
diff --git a/Include/internal/pycore_pythread.h b/Include/internal/pycore_pythread.h
index 5ec2abda91e86b..f679c1bdb75499 100644
--- a/Include/internal/pycore_pythread.h
+++ b/Include/internal/pycore_pythread.h
@@ -8,30 +8,29 @@ extern "C" {
 #  error "this header requires Py_BUILD_CORE define"
 #endif
 
+// Get _POSIX_THREADS and _POSIX_SEMAPHORES macros if available
+#if (defined(HAVE_UNISTD_H) && !defined(_POSIX_THREADS) \
+                            && !defined(_POSIX_SEMAPHORES))
+#  include <unistd.h>             // _POSIX_THREADS, _POSIX_SEMAPHORES
+#endif
+#if (defined(HAVE_PTHREAD_H) && !defined(_POSIX_THREADS) \
+                             && !defined(_POSIX_SEMAPHORES))
+   // This means pthreads are not implemented in libc headers, hence the macro
+   // not present in <unistd.h>. But they still can be implemented as an
+   // external library (e.g. gnu pth in pthread emulation)
+#  include <pthread.h>            // _POSIX_THREADS, _POSIX_SEMAPHORES
+#endif
+#if !defined(_POSIX_THREADS) && defined(__hpux) && defined(_SC_THREADS)
+   // Check if we're running on HP-UX and _SC_THREADS is defined. If so, then
+   // enough of the POSIX threads package is implemented to support Python
+   // threads.
+   //
+   // This is valid for HP-UX 11.23 running on an ia64 system. If needed, add
+   // a check of __ia64 to verify that we're running on an ia64 system instead
+   // of a pa-risc system.
+#  define _POSIX_THREADS
+#endif
 
-#ifndef _POSIX_THREADS
-/* This means pthreads are not implemented in libc headers, hence the macro
-   not present in unistd.h. But they still can be implemented as an external
-   library (e.g. gnu pth in pthread emulation) */
-#  ifdef HAVE_PTHREAD_H
-#    include <pthread.h>            // _POSIX_THREADS
-#  endif
-# ifndef _POSIX_THREADS
-/* Check if we're running on HP-UX and _SC_THREADS is defined. If so, then
-   enough of the Posix threads package is implemented to support python
-   threads.
-
-   This is valid for HP-UX 11.23 running on an ia64 system. If needed, add
-   a check of __ia64 to verify that we're running on an ia64 system instead
-   of a pa-risc system.
-*/
-#  ifdef __hpux
-#   ifdef _SC_THREADS
-#    define _POSIX_THREADS
-#   endif
-#  endif
-# endif /* _POSIX_THREADS */
-#endif /* _POSIX_THREADS */
 
 #if defined(_POSIX_THREADS) || defined(HAVE_PTHREAD_STUBS)
 #  define _USE_PTHREADS
@@ -44,6 +43,8 @@ extern "C" {
 
 
 #if defined(HAVE_PTHREAD_STUBS)
+#include <stdbool.h>              // bool
+
 // pthread_key
 struct py_stub_tls_entry {
     bool in_use;
diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h
index cc3a3420befa3d..1dc243e46e8ef8 100644
--- a/Include/internal/pycore_runtime.h
+++ b/Include/internal/pycore_runtime.h
@@ -201,10 +201,7 @@ typedef struct pyruntimestate {
      tools. */
 
     // XXX Remove this field once we have a tp_* slot.
-    struct _xidregistry {
-        PyThread_type_lock mutex;
-        struct _xidregitem *head;
-    } xidregistry;
+    struct _xidregistry xidregistry;
 
     struct _pymem_allocators allocators;
     struct _obmalloc_global_state obmalloc;
diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h
index 2deba02a89f33c..574a3c1a9db66c 100644
--- a/Include/internal/pycore_runtime_init.h
+++ b/Include/internal/pycore_runtime_init.h
@@ -185,6 +185,7 @@ extern PyTypeObject _PyExc_MemoryError;
 
 #define _PyThreadState_INIT \
     { \
+        ._whence = _PyThreadState_WHENCE_NOTSET, \
         .py_recursion_limit = Py_DEFAULT_RECURSION_LIMIT, \
         .context_ver = 1, \
     }
diff --git a/Include/internal/pycore_semaphore.h b/Include/internal/pycore_semaphore.h
index 2a4ecb7147acee..4c37df7b39a48a 100644
--- a/Include/internal/pycore_semaphore.h
+++ b/Include/internal/pycore_semaphore.h
@@ -7,7 +7,8 @@
 #  error "this header requires Py_BUILD_CORE define"
 #endif
 
-#include "pycore_time.h"            // _PyTime_t
+#include "pycore_pythread.h"      // _POSIX_SEMAPHORES
+#include "pycore_time.h"          // _PyTime_t
 
 #ifdef MS_WINDOWS
 #   define WIN32_LEAN_AND_MEAN
@@ -26,6 +27,7 @@
 #   include <semaphore.h>
 #endif
 
+
 #ifdef __cplusplus
 extern "C" {
 #endif
diff --git a/Include/pystate.h b/Include/pystate.h
index e6b4de979c87b8..727b8fbfffe0e6 100644
--- a/Include/pystate.h
+++ b/Include/pystate.h
@@ -56,7 +56,7 @@ PyAPI_FUNC(void) PyThreadState_Delete(PyThreadState *);
 
    The caller must hold the GIL.
 
-   See also _PyThreadState_UncheckedGet() and _PyThreadState_GET(). */
+   See also PyThreadState_GetUnchecked() and _PyThreadState_GET(). */
 PyAPI_FUNC(PyThreadState *) PyThreadState_Get(void);
 
 // Alias to PyThreadState_Get()
diff --git a/Include/pythread.h b/Include/pythread.h
index 63714437c496b7..0784f6b2e5391f 100644
--- a/Include/pythread.h
+++ b/Include/pythread.h
@@ -33,42 +33,18 @@ PyAPI_FUNC(int) PyThread_acquire_lock(PyThread_type_lock, int);
 #define WAIT_LOCK       1
 #define NOWAIT_LOCK     0
 
-/* PY_TIMEOUT_T is the integral type used to specify timeouts when waiting
-   on a lock (see PyThread_acquire_lock_timed() below).
-   PY_TIMEOUT_MAX is the highest usable value (in microseconds) of that
-   type, and depends on the system threading API.
-
-   NOTE: this isn't the same value as `_thread.TIMEOUT_MAX`.  The _thread
-   module exposes a higher-level API, with timeouts expressed in seconds
-   and floating-point numbers allowed.
-*/
+// PY_TIMEOUT_T is the integral type used to specify timeouts when waiting
+// on a lock (see PyThread_acquire_lock_timed() below).
 #define PY_TIMEOUT_T long long
 
-#if defined(_POSIX_THREADS)
-   /* PyThread_acquire_lock_timed() uses _PyTime_FromNanoseconds(us * 1000),
-      convert microseconds to nanoseconds. */
-#  define PY_TIMEOUT_MAX (LLONG_MAX / 1000)
-#elif defined (NT_THREADS)
-   // WaitForSingleObject() accepts timeout in milliseconds in the range
-   // [0; 0xFFFFFFFE] (DWORD type). INFINITE value (0xFFFFFFFF) means no
-   // timeout. 0xFFFFFFFE milliseconds is around 49.7 days.
-#  if 0xFFFFFFFELL * 1000 < LLONG_MAX
-#    define PY_TIMEOUT_MAX (0xFFFFFFFELL * 1000)
-#  else
-#    define PY_TIMEOUT_MAX LLONG_MAX
-#  endif
-#else
-#  define PY_TIMEOUT_MAX LLONG_MAX
-#endif
-
 
 /* If microseconds == 0, the call is non-blocking: it returns immediately
    even when the lock can't be acquired.
    If microseconds > 0, the call waits up to the specified duration.
    If microseconds < 0, the call waits until success (or abnormal failure)
 
-   microseconds must be less than PY_TIMEOUT_MAX. Behaviour otherwise is
-   undefined.
+   If *microseconds* is greater than PY_TIMEOUT_MAX, clamp the timeout to
+   PY_TIMEOUT_MAX microseconds.
 
    If intr_flag is true and the acquire is interrupted by a signal, then the
    call will return PY_LOCK_INTR.  The caller may reattempt to acquire the
diff --git a/Lib/argparse.py b/Lib/argparse.py
index dfc98695f64e0a..a32884db80d1ea 100644
--- a/Lib/argparse.py
+++ b/Lib/argparse.py
@@ -89,8 +89,6 @@
 import re as _re
 import sys as _sys
 
-import warnings
-
 from gettext import gettext as _, ngettext
 
 SUPPRESS = '==SUPPRESS=='
@@ -910,6 +908,7 @@ def __init__(self,
         #   parser.add_argument('-f', action=BooleanOptionalAction, type=int)
         for field_name in ('type', 'choices', 'metavar'):
             if locals()[field_name] is not _deprecated_default:
+                import warnings
                 warnings._deprecated(
                     field_name,
                     "{name!r} is deprecated as of Python 3.12 and will be "
@@ -1700,6 +1699,7 @@ def _remove_action(self, action):
         self._group_actions.remove(action)
 
     def add_argument_group(self, *args, **kwargs):
+        import warnings
         warnings.warn(
             "Nesting argument groups is deprecated.",
             category=DeprecationWarning,
@@ -1728,6 +1728,7 @@ def _remove_action(self, action):
         self._group_actions.remove(action)
 
     def add_mutually_exclusive_group(self, *args, **kwargs):
+        import warnings
         warnings.warn(
             "Nesting mutually exclusive groups is deprecated.",
             category=DeprecationWarning,
diff --git a/Lib/ast.py b/Lib/ast.py
index 1f54309c8450d8..f7888d18859ae4 100644
--- a/Lib/ast.py
+++ b/Lib/ast.py
@@ -1270,13 +1270,15 @@ def visit_JoinedStr(self, node):
         quote_type = quote_types[0]
         self.write(f"{quote_type}{value}{quote_type}")
 
-    def _write_fstring_inner(self, node):
+    def _write_fstring_inner(self, node, scape_newlines=False):
         if isinstance(node, JoinedStr):
             # for both the f-string itself, and format_spec
             for value in node.values:
-                self._write_fstring_inner(value)
+                self._write_fstring_inner(value, scape_newlines=scape_newlines)
         elif isinstance(node, Constant) and isinstance(node.value, str):
             value = node.value.replace("{", "{{").replace("}", "}}")
+            if scape_newlines:
+                value = value.replace("\n", "\\n")
             self.write(value)
         elif isinstance(node, FormattedValue):
             self.visit_FormattedValue(node)
@@ -1299,7 +1301,10 @@ def unparse_inner(inner):
                 self.write(f"!{chr(node.conversion)}")
             if node.format_spec:
                 self.write(":")
-                self._write_fstring_inner(node.format_spec)
+                self._write_fstring_inner(
+                    node.format_spec,
+                    scape_newlines=True
+                )
 
     def visit_Name(self, node):
         self.write(node.id)
diff --git a/Lib/calendar.py b/Lib/calendar.py
index 2a4deb70a0111f..03469d8ac96bcd 100644
--- a/Lib/calendar.py
+++ b/Lib/calendar.py
@@ -10,7 +10,6 @@
 from enum import IntEnum, global_enum
 import locale as _locale
 from itertools import repeat
-import warnings
 
 __all__ = ["IllegalMonthError", "IllegalWeekdayError", "setfirstweekday",
            "firstweekday", "isleap", "leapdays", "weekday", "monthrange",
@@ -44,6 +43,7 @@ def __str__(self):
 
 def __getattr__(name):
     if name in ('January', 'February'):
+        import warnings
         warnings.warn(f"The '{name}' attribute is deprecated, use '{name.upper()}' instead",
                       DeprecationWarning, stacklevel=2)
         if name == 'January':
diff --git a/Lib/concurrent/futures/process.py b/Lib/concurrent/futures/process.py
index 3990e6b1833d78..ffaffdb8b3d0aa 100644
--- a/Lib/concurrent/futures/process.py
+++ b/Lib/concurrent/futures/process.py
@@ -666,7 +666,7 @@ def __init__(self, max_workers=None, mp_context=None,
         _check_system_limits()
 
         if max_workers is None:
-            self._max_workers = os.cpu_count() or 1
+            self._max_workers = os.process_cpu_count() or 1
             if sys.platform == 'win32':
                 self._max_workers = min(_MAX_WINDOWS_WORKERS,
                                         self._max_workers)
diff --git a/Lib/concurrent/futures/thread.py b/Lib/concurrent/futures/thread.py
index 3b3a36a5093336..a024033f35fb54 100644
--- a/Lib/concurrent/futures/thread.py
+++ b/Lib/concurrent/futures/thread.py
@@ -139,10 +139,10 @@ def __init__(self, max_workers=None, thread_name_prefix='',
             # * CPU bound task which releases GIL
             # * I/O bound task (which releases GIL, of course)
             #
-            # We use cpu_count + 4 for both types of tasks.
+            # We use process_cpu_count + 4 for both types of tasks.
             # But we limit it to 32 to avoid consuming surprisingly large resource
             # on many core machine.
-            max_workers = min(32, (os.cpu_count() or 1) + 4)
+            max_workers = min(32, (os.process_cpu_count() or 1) + 4)
         if max_workers <= 0:
             raise ValueError("max_workers must be greater than 0")
 
diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py
index 84f8d68ce092a4..31dc6f8abce91a 100644
--- a/Lib/dataclasses.py
+++ b/Lib/dataclasses.py
@@ -1567,15 +1567,15 @@ def _replace(obj, /, **changes):
         if not f.init:
             # Error if this field is specified in changes.
             if f.name in changes:
-                raise ValueError(f'field {f.name} is declared with '
-                                 'init=False, it cannot be specified with '
-                                 'replace()')
+                raise TypeError(f'field {f.name} is declared with '
+                                f'init=False, it cannot be specified with '
+                                f'replace()')
             continue
 
         if f.name not in changes:
             if f._field_type is _FIELD_INITVAR and f.default is MISSING:
-                raise ValueError(f"InitVar {f.name!r} "
-                                 'must be specified with replace()')
+                raise TypeError(f"InitVar {f.name!r} "
+                                f'must be specified with replace()')
             changes[f.name] = getattr(obj, f.name)
 
     # Create the new object, which calls __init__() and
diff --git a/Lib/dis.py b/Lib/dis.py
index 7e4792e8a8dc62..633c01b6fce56a 100644
--- a/Lib/dis.py
+++ b/Lib/dis.py
@@ -901,12 +901,14 @@ def _test():
     import argparse
 
     parser = argparse.ArgumentParser()
+    parser.add_argument('-C', '--show-caches', action='store_true',
+                        help='show inline caches')
     parser.add_argument('infile', type=argparse.FileType('rb'), nargs='?', default='-')
     args = parser.parse_args()
     with args.infile as infile:
         source = infile.read()
     code = compile(source, args.infile.name, "exec")
-    dis(code)
+    dis(code, show_caches=args.show_caches)
 
 if __name__ == "__main__":
     _test()
diff --git a/Lib/getpass.py b/Lib/getpass.py
index 6970d8adfbab36..8b42c0a536b4c4 100644
--- a/Lib/getpass.py
+++ b/Lib/getpass.py
@@ -18,7 +18,6 @@
 import io
 import os
 import sys
-import warnings
 
 __all__ = ["getpass","getuser","GetPassWarning"]
 
@@ -118,6 +117,7 @@ def win_getpass(prompt='Password: ', stream=None):
 
 
 def fallback_getpass(prompt='Password: ', stream=None):
+    import warnings
     warnings.warn("Can not control echo on the terminal.", GetPassWarning,
                   stacklevel=2)
     if not stream:
diff --git a/Lib/multiprocessing/pool.py b/Lib/multiprocessing/pool.py
index 4f5d88cb975cb7..f979890170b1a1 100644
--- a/Lib/multiprocessing/pool.py
+++ b/Lib/multiprocessing/pool.py
@@ -200,7 +200,7 @@ def __init__(self, processes=None, initializer=None, initargs=(),
         self._initargs = initargs
 
         if processes is None:
-            processes = os.cpu_count() or 1
+            processes = os.process_cpu_count() or 1
         if processes < 1:
             raise ValueError("Number of processes must be at least 1")
         if maxtasksperchild is not None:
diff --git a/Lib/ntpath.py b/Lib/ntpath.py
index df3402d46c9cc6..3061a4a5ef4c56 100644
--- a/Lib/ntpath.py
+++ b/Lib/ntpath.py
@@ -23,7 +23,6 @@
 import genericpath
 from genericpath import *
 
-
 __all__ = ["normcase","isabs","join","splitdrive","splitroot","split","splitext",
            "basename","dirname","commonprefix","getsize","getmtime",
            "getatime","getctime", "islink","exists","lexists","isdir","isfile",
@@ -601,7 +600,7 @@ def abspath(path):
             return _abspath_fallback(path)
 
 try:
-    from nt import _getfinalpathname, readlink as _nt_readlink
+    from nt import _findfirstfile, _getfinalpathname, readlink as _nt_readlink
 except ImportError:
     # realpath is a no-op on systems without _getfinalpathname support.
     realpath = abspath
@@ -688,10 +687,15 @@ def _getfinalpathname_nonstrict(path):
                 except OSError:
                     # If we fail to readlink(), let's keep traversing
                     pass
-                path, name = split(path)
-                # TODO (bpo-38186): Request the real file name from the directory
-                # entry using FindFirstFileW. For now, we will return the path
-                # as best we have it
+                # If we get these errors, try to get the real name of the file without accessing it.
+                if ex.winerror in (1, 5, 32, 50, 87, 1920, 1921):
+                    try:
+                        name = _findfirstfile(path)
+                        path, _ = split(path)
+                    except OSError:
+                        path, name = split(path)
+                else:
+                    path, name = split(path)
                 if path and not name:
                     return path + tail
                 tail = join(name, tail) if tail else name
diff --git a/Lib/os.py b/Lib/os.py
index d8c9ba4b15400a..35842cedf14fc7 100644
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -1136,3 +1136,17 @@ def add_dll_directory(path):
             cookie,
             nt._remove_dll_directory
         )
+
+
+if _exists('sched_getaffinity'):
+    def process_cpu_count():
+        """
+        Get the number of CPUs of the current process.
+
+        Return the number of logical CPUs usable by the calling thread of the
+        current process. Return None if indeterminable.
+        """
+        return len(sched_getaffinity(0))
+else:
+    # Just an alias to cpu_count() (same docstring)
+    process_cpu_count = cpu_count
diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index e6be9061013a8a..9e6d0754eccf3e 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -18,7 +18,6 @@
 from _collections_abc import Sequence
 from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL
 from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
-from urllib.parse import quote_from_bytes as urlquote_from_bytes
 
 try:
     import pwd
@@ -452,7 +451,8 @@ def as_uri(self):
             # It's a posix path => 'file:///etc/hosts'
             prefix = 'file://'
             path = str(self)
-        return prefix + urlquote_from_bytes(os.fsencode(path))
+        from urllib.parse import quote_from_bytes
+        return prefix + quote_from_bytes(os.fsencode(path))
 
     @property
     def _str_normcase(self):
@@ -814,9 +814,10 @@ class _PathBase(PurePath):
     __bytes__ = None
     __fspath__ = None  # virtual paths have no local file system representation
 
-    def _unsupported(self, method_name):
-        msg = f"{type(self).__name__}.{method_name}() is unsupported"
-        if isinstance(self, Path):
+    @classmethod
+    def _unsupported(cls, method_name):
+        msg = f"{cls.__name__}.{method_name}() is unsupported"
+        if issubclass(cls, Path):
             msg += " on this system"
         raise UnsupportedOperation(msg)
 
@@ -1418,6 +1419,11 @@ def group(self):
         """
         self._unsupported("group")
 
+    @classmethod
+    def from_uri(cls, uri):
+        """Return a new path from the given 'file' URI."""
+        cls._unsupported("from_uri")
+
     def as_uri(self):
         """Return the path as a URI."""
         self._unsupported("as_uri")
@@ -1661,6 +1667,30 @@ def expanduser(self):
 
         return self
 
+    @classmethod
+    def from_uri(cls, uri):
+        """Return a new path from the given 'file' URI."""
+        if not uri.startswith('file:'):
+            raise ValueError(f"URI does not start with 'file:': {uri!r}")
+        path = uri[5:]
+        if path[:3] == '///':
+            # Remove empty authority
+            path = path[2:]
+        elif path[:12] == '//localhost/':
+            # Remove 'localhost' authority
+            path = path[11:]
+        if path[:3] == '///' or (path[:1] == '/' and path[2:3] in ':|'):
+            # Remove slash before DOS device/UNC path
+            path = path[1:]
+        if path[1:2] == '|':
+            # Replace bar with colon in DOS drive
+            path = path[:1] + ':' + path[2:]
+        from urllib.parse import unquote_to_bytes
+        path = cls(os.fsdecode(unquote_to_bytes(path)))
+        if not path.is_absolute():
+            raise ValueError(f"URI is not absolute: {uri!r}")
+        return path
+
 
 class PosixPath(Path, PurePosixPath):
     """Path subclass for non-Windows systems.
diff --git a/Lib/random.py b/Lib/random.py
index 1d789b107904fb..1cfc2ba2f025b5 100644
--- a/Lib/random.py
+++ b/Lib/random.py
@@ -65,7 +65,7 @@
 
 try:
     # hashlib is pretty heavy to load, try lean internal module first
-    from _sha512 import sha512 as _sha512
+    from _sha2 import sha512 as _sha512
 except ImportError:
     # fallback to official implementation
     from hashlib import sha512 as _sha512
diff --git a/Lib/shutil.py b/Lib/shutil.py
index b37bd082eee0c6..0fed0117a63234 100644
--- a/Lib/shutil.py
+++ b/Lib/shutil.py
@@ -10,7 +10,6 @@
 import fnmatch
 import collections
 import errno
-import warnings
 
 try:
     import zlib
@@ -481,7 +480,7 @@ def _copytree(entries, src, dst, symlinks, ignore, copy_function,
     if ignore is not None:
         ignored_names = ignore(os.fspath(src), [x.name for x in entries])
     else:
-        ignored_names = set()
+        ignored_names = ()
 
     os.makedirs(dst, exist_ok=dirs_exist_ok)
     errors = []
@@ -723,6 +722,7 @@ def rmtree(path, ignore_errors=False, onerror=None, *, onexc=None, dir_fd=None):
     """
 
     if onerror is not None:
+        import warnings
         warnings.warn("onerror argument is deprecated, use onexc instead",
                       DeprecationWarning, stacklevel=2)
 
@@ -1554,8 +1554,16 @@ def which(cmd, mode=os.F_OK | os.X_OK, path=None):
         if use_bytes:
             pathext = [os.fsencode(ext) for ext in pathext]
 
-        # Always try checking the originally given cmd, if it doesn't match, try pathext
-        files = [cmd] + [cmd + ext for ext in pathext]
+        files = ([cmd] + [cmd + ext for ext in pathext])
+
+        # gh-109590. If we are looking for an executable, we need to look
+        # for a PATHEXT match. The first cmd is the direct match
+        # (e.g. python.exe instead of python)
+        # Check that direct match first if and only if the extension is in PATHEXT
+        # Otherwise check it last
+        suffix = os.path.splitext(files[0])[1].upper()
+        if mode & os.X_OK and not any(suffix == ext.upper() for ext in pathext):
+            files.append(files.pop(0))
     else:
         # On other platforms you don't have things like PATHEXT to tell you
         # what file suffixes are executable, so just pass on cmd as-is.
diff --git a/Lib/sqlite3/__main__.py b/Lib/sqlite3/__main__.py
index 3b59763375c147..b93b84384a0925 100644
--- a/Lib/sqlite3/__main__.py
+++ b/Lib/sqlite3/__main__.py
@@ -116,6 +116,10 @@ def main(*args):
         else:
             # No SQL provided; start the REPL.
             console = SqliteInteractiveConsole(con)
+            try:
+                import readline
+            except ImportError:
+                pass
             console.interact(banner, exitmsg="")
     finally:
         con.close()
diff --git a/Lib/statistics.py b/Lib/statistics.py
index 96c803483057e7..4da06889c6db46 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -844,7 +844,9 @@ def quantiles(data, *, n=4, method='exclusive'):
     data = sorted(data)
     ld = len(data)
     if ld < 2:
-        raise StatisticsError('must have at least two data points')
+        if ld == 1:
+            return data * (n - 1)
+        raise StatisticsError('must have at least one data point')
     if method == 'inclusive':
         m = ld - 1
         result = []
diff --git a/Lib/sysconfig.py b/Lib/sysconfig.py
index a8b5c5f7dfba5b..edfe451a86bfd0 100644
--- a/Lib/sysconfig.py
+++ b/Lib/sysconfig.py
@@ -544,16 +544,20 @@ def _init_posix(vars):
 def _init_non_posix(vars):
     """Initialize the module as appropriate for NT"""
     # set basic install directories
-    import _imp
+    import _winapi
+    import _sysconfig
     vars['LIBDEST'] = get_path('stdlib')
     vars['BINLIBDEST'] = get_path('platstdlib')
     vars['INCLUDEPY'] = get_path('include')
-    try:
-        # GH-99201: _imp.extension_suffixes may be empty when
-        # HAVE_DYNAMIC_LOADING is not set. In this case, don't set EXT_SUFFIX.
-        vars['EXT_SUFFIX'] = _imp.extension_suffixes()[0]
-    except IndexError:
-        pass
+
+    # Add EXT_SUFFIX, SOABI, and Py_NOGIL
+    vars.update(_sysconfig.config_vars())
+
+    vars['LIBDIR'] = _safe_realpath(os.path.join(get_config_var('installed_base'), 'libs'))
+    if hasattr(sys, 'dllhandle'):
+        dllhandle = _winapi.GetModuleFileName(sys.dllhandle)
+        vars['LIBRARY'] = os.path.basename(_safe_realpath(dllhandle))
+        vars['LDLIBRARY'] = vars['LIBRARY']
     vars['EXE'] = '.exe'
     vars['VERSION'] = _PY_VERSION_SHORT_NO_DOT
     vars['BINDIR'] = os.path.dirname(_safe_realpath(sys.executable))
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 726f9f50ba2e72..ec32f9ba49b03f 100755
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -46,7 +46,6 @@
 import struct
 import copy
 import re
-import warnings
 
 try:
     import pwd
@@ -2219,6 +2218,7 @@ def _get_filter_function(self, filter):
         if filter is None:
             filter = self.extraction_filter
             if filter is None:
+                import warnings
                 warnings.warn(
                     'Python 3.14 will, by default, filter extracted tar '
                     + 'archives and reject files or modify their metadata. '
diff --git a/Lib/test/.ruff.toml b/Lib/test/.ruff.toml
index e202766b147e6d..f4e68eba14068d 100644
--- a/Lib/test/.ruff.toml
+++ b/Lib/test/.ruff.toml
@@ -8,9 +8,6 @@ extend-exclude = [
     # Failed to lint
     "encoded_modules/module_iso_8859_1.py",
     "encoded_modules/module_koi8_r.py",
-    # Failed to parse
-    "support/socket_helper.py",
-    "test_fstring.py",
     # TODO Fix: F811 Redefinition of unused name
     "test__opcode.py",
     "test_buffer.py",
@@ -26,8 +23,6 @@ extend-exclude = [
     "test_keywordonlyarg.py",
     "test_pkg.py",
     "test_subclassinit.py",
-    "test_typing.py",
-    "test_unittest/testmock/testpatch.py",
     "test_yield_from.py",
     "time_hashlib.py",
     # Pending https://github.com/python/cpython/pull/109139
diff --git a/Lib/test/libregrtest/cmdline.py b/Lib/test/libregrtest/cmdline.py
index 8562a48446b4a7..dd4cd335bef7e3 100644
--- a/Lib/test/libregrtest/cmdline.py
+++ b/Lib/test/libregrtest/cmdline.py
@@ -3,6 +3,7 @@
 import shlex
 import sys
 from test.support import os_helper
+from .utils import ALL_RESOURCES, RESOURCE_NAMES
 
 
 USAGE = """\
@@ -27,8 +28,10 @@
 Additional option details:
 
 -r randomizes test execution order. You can use --randseed=int to provide an
-int seed value for the randomizer; this is useful for reproducing troublesome
-test orders.
+int seed value for the randomizer. The randseed value will be used
+to set seeds for all random usages in tests
+(including randomizing the tests order if -r is set).
+By default we always set random seed, but do not randomize test order.
 
 -s On the first invocation of regrtest using -s, the first test file found
 or the first test file given on the command line is run, and the name of
@@ -130,19 +133,6 @@
 """
 
 
-ALL_RESOURCES = ('audio', 'curses', 'largefile', 'network',
-                 'decimal', 'cpu', 'subprocess', 'urlfetch', 'gui', 'walltime')
-
-# Other resources excluded from --use=all:
-#
-# - extralagefile (ex: test_zipfile64): really too slow to be enabled
-#   "by default"
-# - tzdata: while needed to validate fully test_datetime, it makes
-#   test_datetime too slow (15-20 min on some buildbots) and so is disabled by
-#   default (see bpo-30822).
-RESOURCE_NAMES = ALL_RESOURCES + ('extralargefile', 'tzdata')
-
-
 class Namespace(argparse.Namespace):
     def __init__(self, **kwargs) -> None:
         self.ci = False
@@ -229,6 +219,9 @@ def _create_parser():
                             more_details)
     group.add_argument('-p', '--python', metavar='PYTHON',
                        help='Command to run Python test subprocesses with.')
+    group.add_argument('--randseed', metavar='SEED',
+                       dest='random_seed', type=int,
+                       help='pass a global random seed')
 
     group = parser.add_argument_group('Verbosity')
     group.add_argument('-v', '--verbose', action='count',
@@ -249,10 +242,6 @@ def _create_parser():
     group = parser.add_argument_group('Selecting tests')
     group.add_argument('-r', '--randomize', action='store_true',
                        help='randomize test execution order.' + more_details)
-    group.add_argument('--randseed', metavar='SEED',
-                       dest='random_seed', type=int,
-                       help='pass a random seed to reproduce a previous '
-                            'random run')
     group.add_argument('-f', '--fromfile', metavar='FILE',
                        help='read names of tests to run from a file.' +
                             more_details)
diff --git a/Lib/test/libregrtest/main.py b/Lib/test/libregrtest/main.py
index dcb2c5870de176..60179ec7708c1c 100644
--- a/Lib/test/libregrtest/main.py
+++ b/Lib/test/libregrtest/main.py
@@ -3,6 +3,7 @@
 import re
 import shlex
 import sys
+import sysconfig
 import time
 
 from test import support
@@ -22,6 +23,7 @@
     strip_py_suffix, count, format_duration,
     printlist, get_temp_dir, get_work_dir, exit_timeout,
     display_header, cleanup_temp_dir, print_warning,
+    is_cross_compiled, get_host_runner,
     MS_WINDOWS, EXIT_TIMEOUT)
 
 
@@ -71,10 +73,9 @@ def __init__(self, ns: Namespace, _add_python_opts: bool = False):
         self.want_rerun: bool = ns.rerun
         self.want_run_leaks: bool = ns.runleaks
 
-        ci_mode = (ns.fast_ci or ns.slow_ci)
+        self.ci_mode: bool = (ns.fast_ci or ns.slow_ci)
         self.want_add_python_opts: bool = (_add_python_opts
-                                           and ns._add_python_opts
-                                           and ci_mode)
+                                           and ns._add_python_opts)
 
         # Select tests
         if ns.match_tests:
@@ -105,8 +106,6 @@ def __init__(self, ns: Namespace, _add_python_opts: bool = False):
         self.fail_env_changed: bool = ns.fail_env_changed
         self.fail_rerun: bool = ns.fail_rerun
         self.forever: bool = ns.forever
-        self.randomize: bool = ns.randomize
-        self.random_seed: int | None = ns.random_seed
         self.output_on_failure: bool = ns.verbose3
         self.timeout: float | None = ns.timeout
         if ns.huntrleaks:
@@ -128,6 +127,17 @@ def __init__(self, ns: Namespace, _add_python_opts: bool = False):
         self.coverage_dir: StrPath | None = ns.coverdir
         self.tmp_dir: StrPath | None = ns.tempdir
 
+        # Randomize
+        self.randomize: bool = ns.randomize
+        self.random_seed: int | None =  (
+            ns.random_seed
+            if ns.random_seed is not None
+            else random.getrandbits(32)
+        )
+        if 'SOURCE_DATE_EPOCH' in os.environ:
+            self.randomize = False
+            self.random_seed = None
+
         # tests
         self.first_runtests: RunTests | None = None
 
@@ -208,10 +218,8 @@ def find_tests(self, tests: TestList | None = None) -> tuple[TestTuple, TestList
                 print(f"Cannot find starting test: {self.starting_test}")
                 sys.exit(1)
 
+        random.seed(self.random_seed)
         if self.randomize:
-            if self.random_seed is None:
-                self.random_seed = random.randrange(100_000_000)
-            random.seed(self.random_seed)
             random.shuffle(selected)
 
         return (tuple(selected), tests)
@@ -425,16 +433,15 @@ def _run_tests(self, selected: TestTuple, tests: TestList | None) -> int:
         if self.num_workers < 0:
             # Use all CPUs + 2 extra worker processes for tests
             # that like to sleep
-            self.num_workers = (os.cpu_count() or 1) + 2
+            self.num_workers = (os.process_cpu_count() or 1) + 2
 
         # For a partial run, we do not need to clutter the output.
         if (self.want_header
             or not(self.pgo or self.quiet or self.single_test_run
                    or tests or self.cmdline_args)):
-            display_header(self.use_resources)
+            display_header(self.use_resources, self.python_cmd)
 
-        if self.randomize:
-            print("Using random seed", self.random_seed)
+        print("Using random seed", self.random_seed)
 
         runtests = self.create_run_tests(selected)
         self.first_runtests = runtests
@@ -489,8 +496,56 @@ def run_tests(self, selected: TestTuple, tests: TestList | None) -> int:
                 # processes.
                 return self._run_tests(selected, tests)
 
-    def _add_python_opts(self):
-        python_opts = []
+    def _add_cross_compile_opts(self, regrtest_opts):
+        # WASM/WASI buildbot builders pass multiple PYTHON environment
+        # variables such as PYTHONPATH and _PYTHON_HOSTRUNNER.
+        keep_environ = bool(self.python_cmd)
+        environ = None
+
+        # Are we using cross-compilation?
+        cross_compile = is_cross_compiled()
+
+        # Get HOSTRUNNER
+        hostrunner = get_host_runner()
+
+        if cross_compile:
+            # emulate -E, but keep PYTHONPATH + cross compile env vars,
+            # so test executable can load correct sysconfigdata file.
+            keep = {
+                '_PYTHON_PROJECT_BASE',
+                '_PYTHON_HOST_PLATFORM',
+                '_PYTHON_SYSCONFIGDATA_NAME',
+                'PYTHONPATH'
+            }
+            old_environ = os.environ
+            new_environ = {
+                name: value for name, value in os.environ.items()
+                if not name.startswith(('PYTHON', '_PYTHON')) or name in keep
+            }
+            # Only set environ if at least one variable was removed
+            if new_environ != old_environ:
+                environ = new_environ
+            keep_environ = True
+
+        if cross_compile and hostrunner:
+            if self.num_workers == 0:
+                # For now use only two cores for cross-compiled builds;
+                # hostrunner can be expensive.
+                regrtest_opts.extend(['-j', '2'])
+
+            # If HOSTRUNNER is set and -p/--python option is not given, then
+            # use hostrunner to execute python binary for tests.
+            if not self.python_cmd:
+                buildpython = sysconfig.get_config_var("BUILDPYTHON")
+                python_cmd = f"{hostrunner} {buildpython}"
+                regrtest_opts.extend(["--python", python_cmd])
+                keep_environ = True
+
+        return (environ, keep_environ)
+
+    def _add_ci_python_opts(self, python_opts, keep_environ):
+        # --fast-ci and --slow-ci add options to Python:
+        # "-u -W default -bb -E"
 
         # Unbuffered stdout and stderr
         if not sys.stdout.write_through:
@@ -504,32 +559,27 @@ def _add_python_opts(self):
         if sys.flags.bytes_warning < 2:
             python_opts.append('-bb')
 
-        # WASM/WASI buildbot builders pass multiple PYTHON environment
-        # variables such as PYTHONPATH and _PYTHON_HOSTRUNNER.
-        if not self.python_cmd:
+        if not keep_environ:
             # Ignore PYTHON* environment variables
             if not sys.flags.ignore_environment:
                 python_opts.append('-E')
 
-        if not python_opts:
-            return
-
-        cmd = [*sys.orig_argv, "--dont-add-python-opts"]
-        cmd[1:1] = python_opts
-
+    def _execute_python(self, cmd, environ):
         # Make sure that messages before execv() are logged
         sys.stdout.flush()
         sys.stderr.flush()
 
         cmd_text = shlex.join(cmd)
         try:
+            print(f"+ {cmd_text}", flush=True)
+
             if hasattr(os, 'execv') and not MS_WINDOWS:
                 os.execv(cmd[0], cmd)
                 # On success, execv() do no return.
                 # On error, it raises an OSError.
             else:
                 import subprocess
-                with subprocess.Popen(cmd) as proc:
+                with subprocess.Popen(cmd, env=environ) as proc:
                     try:
                         proc.wait()
                     except KeyboardInterrupt:
@@ -548,6 +598,28 @@ def _add_python_opts(self):
                           f"Command: {cmd_text}")
             # continue executing main()
 
+    def _add_python_opts(self):
+        python_opts = []
+        regrtest_opts = []
+
+        environ, keep_environ = self._add_cross_compile_opts(regrtest_opts)
+        if self.ci_mode:
+            self._add_ci_python_opts(python_opts, keep_environ)
+
+        if (not python_opts) and (not regrtest_opts) and (environ is None):
+            # Nothing changed: nothing to do
+            return
+
+        # Create new command line
+        cmd = list(sys.orig_argv)
+        if python_opts:
+            cmd[1:1] = python_opts
+        if regrtest_opts:
+            cmd.extend(regrtest_opts)
+        cmd.append("--dont-add-python-opts")
+
+        self._execute_python(cmd, environ)
+
     def _init(self):
         # Set sys.stdout encoder error handler to backslashreplace,
         # similar to sys.stderr error handler, to avoid UnicodeEncodeError
diff --git a/Lib/test/libregrtest/pgo.py b/Lib/test/libregrtest/pgo.py
index cabbba73d5eff5..e3a6927be5db1d 100644
--- a/Lib/test/libregrtest/pgo.py
+++ b/Lib/test/libregrtest/pgo.py
@@ -42,10 +42,10 @@
     'test_set',
     'test_sqlite3',
     'test_statistics',
+    'test_str',
     'test_struct',
     'test_tabnanny',
     'test_time',
-    'test_unicode',
     'test_xml_etree',
     'test_xml_etree_c',
 ]
diff --git a/Lib/test/libregrtest/result.py b/Lib/test/libregrtest/result.py
index bf885264657d5c..d6b0d5ad383a5b 100644
--- a/Lib/test/libregrtest/result.py
+++ b/Lib/test/libregrtest/result.py
@@ -19,7 +19,8 @@ class State:
     ENV_CHANGED = "ENV_CHANGED"
     RESOURCE_DENIED = "RESOURCE_DENIED"
     INTERRUPTED = "INTERRUPTED"
-    MULTIPROCESSING_ERROR = "MULTIPROCESSING_ERROR"
+    WORKER_FAILED = "WORKER_FAILED"   # non-zero worker process exit code
+    WORKER_BUG = "WORKER_BUG"         # exception when running a worker
     DID_NOT_RUN = "DID_NOT_RUN"
     TIMEOUT = "TIMEOUT"
 
@@ -29,7 +30,8 @@ def is_failed(state):
             State.FAILED,
             State.UNCAUGHT_EXC,
             State.REFLEAK,
-            State.MULTIPROCESSING_ERROR,
+            State.WORKER_FAILED,
+            State.WORKER_BUG,
             State.TIMEOUT}
 
     @staticmethod
@@ -42,14 +44,16 @@ def has_meaningful_duration(state):
             State.SKIPPED,
             State.RESOURCE_DENIED,
             State.INTERRUPTED,
-            State.MULTIPROCESSING_ERROR,
+            State.WORKER_FAILED,
+            State.WORKER_BUG,
             State.DID_NOT_RUN}
 
     @staticmethod
     def must_stop(state):
         return state in {
             State.INTERRUPTED,
-            State.MULTIPROCESSING_ERROR}
+            State.WORKER_BUG,
+        }
 
 
 @dataclasses.dataclass(slots=True)
@@ -108,8 +112,10 @@ def __str__(self) -> str:
                 return f"{self.test_name} skipped (resource denied)"
             case State.INTERRUPTED:
                 return f"{self.test_name} interrupted"
-            case State.MULTIPROCESSING_ERROR:
-                return f"{self.test_name} process crashed"
+            case State.WORKER_FAILED:
+                return f"{self.test_name} worker non-zero exit code"
+            case State.WORKER_BUG:
+                return f"{self.test_name} worker bug"
             case State.DID_NOT_RUN:
                 return f"{self.test_name} ran no tests"
             case State.TIMEOUT:
diff --git a/Lib/test/libregrtest/results.py b/Lib/test/libregrtest/results.py
index 35df50d581ff6a..3708078ff0bf3a 100644
--- a/Lib/test/libregrtest/results.py
+++ b/Lib/test/libregrtest/results.py
@@ -30,6 +30,7 @@ def __init__(self):
         self.rerun_results: list[TestResult] = []
 
         self.interrupted: bool = False
+        self.worker_bug: bool = False
         self.test_times: list[tuple[float, TestName]] = []
         self.stats = TestStats()
         # used by --junit-xml
@@ -38,7 +39,8 @@ def __init__(self):
     def is_all_good(self):
         return (not self.bad
                 and not self.skipped
-                and not self.interrupted)
+                and not self.interrupted
+                and not self.worker_bug)
 
     def get_executed(self):
         return (set(self.good) | set(self.bad) | set(self.skipped)
@@ -60,6 +62,8 @@ def get_state(self, fail_env_changed):
 
         if self.interrupted:
             state.append("INTERRUPTED")
+        if self.worker_bug:
+            state.append("WORKER BUG")
         if not state:
             state.append("SUCCESS")
 
@@ -77,6 +81,8 @@ def get_exitcode(self, fail_env_changed, fail_rerun):
             exitcode = EXITCODE_NO_TESTS_RAN
         elif fail_rerun and self.rerun:
             exitcode = EXITCODE_RERUN_FAIL
+        elif self.worker_bug:
+            exitcode = EXITCODE_BAD_TEST
         return exitcode
 
     def accumulate_result(self, result: TestResult, runtests: RunTests):
@@ -105,6 +111,9 @@ def accumulate_result(self, result: TestResult, runtests: RunTests):
                 else:
                     raise ValueError(f"invalid test state: {result.state!r}")
 
+        if result.state == State.WORKER_BUG:
+            self.worker_bug = True
+
         if result.has_meaningful_duration() and not rerun:
             self.test_times.append((result.duration, test_name))
         if result.stats is not None:
@@ -173,12 +182,6 @@ def write_junit(self, filename: StrPath):
                 f.write(s)
 
     def display_result(self, tests: TestTuple, quiet: bool, print_slowest: bool):
-        omitted = set(tests) - self.get_executed()
-        if omitted:
-            print()
-            print(count(len(omitted), "test"), "omitted:")
-            printlist(omitted)
-
         if print_slowest:
             self.test_times.sort(reverse=True)
             print()
@@ -186,16 +189,21 @@ def display_result(self, tests: TestTuple, quiet: bool, print_slowest: bool):
             for test_time, test in self.test_times[:10]:
                 print("- %s: %s" % (test, format_duration(test_time)))
 
-        all_tests = [
-            (self.bad, "test", "{} failed:"),
-            (self.env_changed, "test", "{} altered the execution environment (env changed):"),
-        ]
+        all_tests = []
+        omitted = set(tests) - self.get_executed()
+
+        # less important
+        all_tests.append((omitted, "test", "{} omitted:"))
         if not quiet:
             all_tests.append((self.skipped, "test", "{} skipped:"))
             all_tests.append((self.resource_denied, "test", "{} skipped (resource denied):"))
-        all_tests.append((self.rerun, "re-run test", "{}:"))
         all_tests.append((self.run_no_tests, "test", "{} run no tests:"))
 
+        # more important
+        all_tests.append((self.env_changed, "test", "{} altered the execution environment (env changed):"))
+        all_tests.append((self.rerun, "re-run test", "{}:"))
+        all_tests.append((self.bad, "test", "{} failed:"))
+
         for tests_list, count_text, title_format in all_tests:
             if tests_list:
                 print()
diff --git a/Lib/test/libregrtest/run_workers.py b/Lib/test/libregrtest/run_workers.py
index 41ed7b0bac01ad..106f9730832e54 100644
--- a/Lib/test/libregrtest/run_workers.py
+++ b/Lib/test/libregrtest/run_workers.py
@@ -22,7 +22,7 @@
 from .single import PROGRESS_MIN_TIME
 from .utils import (
     StrPath, TestName, MS_WINDOWS,
-    format_duration, print_warning, count, plural)
+    format_duration, print_warning, count, plural, get_signal_name)
 from .worker import create_worker_process, USE_PROCESS_GROUP
 
 if MS_WINDOWS:
@@ -92,7 +92,7 @@ def __init__(self,
                  test_name: TestName,
                  err_msg: str | None,
                  stdout: str | None,
-                 state: str = State.MULTIPROCESSING_ERROR):
+                 state: str):
         result = TestResult(test_name, state=state)
         self.mp_result = MultiprocessResult(result, stdout, err_msg)
         super().__init__()
@@ -262,6 +262,9 @@ def create_worker_runtests(self, test_name: TestName, json_file: JsonFile) -> Ru
         kwargs = {}
         if match_tests:
             kwargs['match_tests'] = match_tests
+        if self.runtests.output_on_failure:
+            kwargs['verbose'] = True
+            kwargs['output_on_failure'] = False
         return self.runtests.copy(
             tests=tests,
             json_file=json_file,
@@ -298,7 +301,9 @@ def read_stdout(self, stdout_file: TextIO) -> str:
             # gh-101634: Catch UnicodeDecodeError if stdout cannot be
             # decoded from encoding
             raise WorkerError(self.test_name,
-                              f"Cannot read process stdout: {exc}", None)
+                              f"Cannot read process stdout: {exc}",
+                              stdout=None,
+                              state=State.WORKER_BUG)
 
     def read_json(self, json_file: JsonFile, json_tmpfile: TextIO | None,
                   stdout: str) -> tuple[TestResult, str]:
@@ -317,10 +322,11 @@ def read_json(self, json_file: JsonFile, json_tmpfile: TextIO | None,
             # decoded from encoding
             err_msg = f"Failed to read worker process JSON: {exc}"
             raise WorkerError(self.test_name, err_msg, stdout,
-                              state=State.MULTIPROCESSING_ERROR)
+                              state=State.WORKER_BUG)
 
         if not worker_json:
-            raise WorkerError(self.test_name, "empty JSON", stdout)
+            raise WorkerError(self.test_name, "empty JSON", stdout,
+                              state=State.WORKER_BUG)
 
         try:
             result = TestResult.from_json(worker_json)
@@ -329,7 +335,7 @@ def read_json(self, json_file: JsonFile, json_tmpfile: TextIO | None,
             # decoded from encoding
             err_msg = f"Failed to parse worker process JSON: {exc}"
             raise WorkerError(self.test_name, err_msg, stdout,
-                              state=State.MULTIPROCESSING_ERROR)
+                              state=State.WORKER_BUG)
 
         return (result, stdout)
 
@@ -345,9 +351,15 @@ def _runtest(self, test_name: TestName) -> MultiprocessResult:
             stdout = self.read_stdout(stdout_file)
 
             if retcode is None:
-                raise WorkerError(self.test_name, None, stdout, state=State.TIMEOUT)
+                raise WorkerError(self.test_name, stdout=stdout,
+                                  err_msg=None,
+                                  state=State.TIMEOUT)
             if retcode != 0:
-                raise WorkerError(self.test_name, f"Exit code {retcode}", stdout)
+                name = get_signal_name(retcode)
+                if name:
+                    retcode = f"{retcode} ({name})"
+                raise WorkerError(self.test_name, f"Exit code {retcode}", stdout,
+                                  state=State.WORKER_FAILED)
 
             result, stdout = self.read_json(json_file, json_tmpfile, stdout)
 
@@ -527,7 +539,7 @@ def display_result(self, mp_result: MultiprocessResult) -> None:
 
         text = str(result)
         if mp_result.err_msg:
-            # MULTIPROCESSING_ERROR
+            # WORKER_BUG
             text += ' (%s)' % mp_result.err_msg
         elif (result.duration >= PROGRESS_MIN_TIME and not pgo):
             text += ' (%s)' % format_duration(result.duration)
@@ -543,7 +555,7 @@ def _process_result(self, item: QueueOutput) -> TestResult:
             # Thread got an exception
             format_exc = item[1]
             print_warning(f"regrtest worker thread failed: {format_exc}")
-            result = TestResult("<regrtest worker>", state=State.MULTIPROCESSING_ERROR)
+            result = TestResult("<regrtest worker>", state=State.WORKER_BUG)
             self.results.accumulate_result(result, self.runtests)
             return result
 
@@ -553,8 +565,16 @@ def _process_result(self, item: QueueOutput) -> TestResult:
         self.results.accumulate_result(result, self.runtests)
         self.display_result(mp_result)
 
-        if mp_result.worker_stdout:
-            print(mp_result.worker_stdout, flush=True)
+        # Display worker stdout
+        if not self.runtests.output_on_failure:
+            show_stdout = True
+        else:
+            # --verbose3 ignores stdout on success
+            show_stdout = (result.state != State.PASSED)
+        if show_stdout:
+            stdout = mp_result.worker_stdout
+            if stdout:
+                print(stdout, flush=True)
 
         return result
 
diff --git a/Lib/test/libregrtest/setup.py b/Lib/test/libregrtest/setup.py
index f0d8d7ebaa2fdb..cb410da5acb4c3 100644
--- a/Lib/test/libregrtest/setup.py
+++ b/Lib/test/libregrtest/setup.py
@@ -126,5 +126,4 @@ def setup_tests(runtests: RunTests):
     if runtests.gc_threshold is not None:
         gc.set_threshold(runtests.gc_threshold)
 
-    if runtests.randomize:
-        random.seed(runtests.random_seed)
+    random.seed(runtests.random_seed)
diff --git a/Lib/test/libregrtest/utils.py b/Lib/test/libregrtest/utils.py
index 46451152b8859f..ea2086cd71b173 100644
--- a/Lib/test/libregrtest/utils.py
+++ b/Lib/test/libregrtest/utils.py
@@ -5,6 +5,9 @@
 import os.path
 import platform
 import random
+import shlex
+import signal
+import subprocess
 import sys
 import sysconfig
 import tempfile
@@ -30,6 +33,19 @@
 EXIT_TIMEOUT = 120.0
 
 
+ALL_RESOURCES = ('audio', 'curses', 'largefile', 'network',
+                 'decimal', 'cpu', 'subprocess', 'urlfetch', 'gui', 'walltime')
+
+# Other resources excluded from --use=all:
+#
+# - extralagefile (ex: test_zipfile64): really too slow to be enabled
+#   "by default"
+# - tzdata: while needed to validate fully test_datetime, it makes
+#   test_datetime too slow (15-20 min on some buildbots) and so is disabled by
+#   default (see bpo-30822).
+RESOURCE_NAMES = ALL_RESOURCES + ('extralargefile', 'tzdata')
+
+
 # Types for types hints
 StrPath = str
 TestName = str
@@ -522,7 +538,42 @@ def adjust_rlimit_nofile():
                           f"{new_fd_limit}: {err}.")
 
 
-def display_header(use_resources: tuple[str, ...]):
+def get_host_runner():
+    if (hostrunner := os.environ.get("_PYTHON_HOSTRUNNER")) is None:
+        hostrunner = sysconfig.get_config_var("HOSTRUNNER")
+    return hostrunner
+
+
+def is_cross_compiled():
+    return ('_PYTHON_HOST_PLATFORM' in os.environ)
+
+
+def format_resources(use_resources: tuple[str, ...]):
+    use_resources = set(use_resources)
+    all_resources = set(ALL_RESOURCES)
+
+    # Express resources relative to "all"
+    relative_all = ['all']
+    for name in sorted(all_resources - use_resources):
+        relative_all.append(f'-{name}')
+    for name in sorted(use_resources - all_resources):
+        relative_all.append(f'{name}')
+    all_text = ','.join(relative_all)
+    all_text = f"resources: {all_text}"
+
+    # List of enabled resources
+    text = ','.join(sorted(use_resources))
+    text = f"resources ({len(use_resources)}): {text}"
+
+    # Pick the shortest string (prefer relative to all if lengths are equal)
+    if len(all_text) <= len(text):
+        return all_text
+    else:
+        return text
+
+
+def display_header(use_resources: tuple[str, ...],
+                   python_cmd: tuple[str, ...] | None):
     # Print basic platform information
     print("==", platform.python_implementation(), *sys.version.split())
     print("==", platform.platform(aliased=True),
@@ -532,16 +583,42 @@ def display_header(use_resources: tuple[str, ...]):
 
     cpu_count = os.cpu_count()
     if cpu_count:
+        process_cpu_count = os.process_cpu_count()
+        if process_cpu_count and process_cpu_count != cpu_count:
+            cpu_count = f"{process_cpu_count} (process) / {cpu_count} (system)"
         print("== CPU count:", cpu_count)
-    print("== encodings: locale=%s, FS=%s"
+    print("== encodings: locale=%s FS=%s"
           % (locale.getencoding(), sys.getfilesystemencoding()))
 
-
     if use_resources:
-        print(f"== resources ({len(use_resources)}): "
-              f"{', '.join(sorted(use_resources))}")
+        text = format_resources(use_resources)
+        print(f"== {text}")
+    else:
+        print("== resources: all test resources are disabled, "
+              "use -u option to unskip tests")
+
+    cross_compile = is_cross_compiled()
+    if cross_compile:
+        print("== cross compiled: Yes")
+    if python_cmd:
+        cmd = shlex.join(python_cmd)
+        print(f"== host python: {cmd}")
+
+        get_cmd = [*python_cmd, '-m', 'platform']
+        proc = subprocess.run(
+            get_cmd,
+            stdout=subprocess.PIPE,
+            text=True,
+            cwd=os_helper.SAVEDCWD)
+        stdout = proc.stdout.replace('\n', ' ').strip()
+        if stdout:
+            print(f"== host platform: {stdout}")
+        elif proc.returncode:
+            print(f"== host platform: <command failed with exit code {proc.returncode}>")
     else:
-        print("== resources: (all disabled, use -u option)")
+        hostrunner = get_host_runner()
+        if hostrunner:
+            print(f"== host runner: {hostrunner}")
 
     # This makes it easier to remember what to set in your local
     # environment when trying to reproduce a sanitizer failure.
@@ -581,3 +658,24 @@ def cleanup_temp_dir(tmp_dir: StrPath):
         else:
             print("Remove file: %s" % name)
             os_helper.unlink(name)
+
+WINDOWS_STATUS = {
+    0xC0000005: "STATUS_ACCESS_VIOLATION",
+    0xC00000FD: "STATUS_STACK_OVERFLOW",
+    0xC000013A: "STATUS_CONTROL_C_EXIT",
+}
+
+def get_signal_name(exitcode):
+    if exitcode < 0:
+        signum = -exitcode
+        try:
+            return signal.Signals(signum).name
+        except ValueError:
+            pass
+
+    try:
+        return WINDOWS_STATUS[exitcode]
+    except KeyError:
+        pass
+
+    return None
diff --git a/Lib/test/pythoninfo.py b/Lib/test/pythoninfo.py
index 0e7528ef97c5f6..58d906ffc62a53 100644
--- a/Lib/test/pythoninfo.py
+++ b/Lib/test/pythoninfo.py
@@ -239,6 +239,7 @@ def format_attr(attr, value):
         'getresgid',
         'getresuid',
         'getuid',
+        'process_cpu_count',
         'uname',
     ):
         call_func(info_add, 'os.%s' % func, os, func)
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
index 8d210b198d248d..cecf309dca9194 100644
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -327,11 +327,12 @@ def reference_find(p, s):
             for i in range(len(s)):
                 if s.startswith(p, i):
                     return i
+            if p == '' and s == '':
+                return 0
             return -1
 
-        rr = random.randrange
-        choices = random.choices
-        for _ in range(1000):
+        def check_pattern(rr):
+            choices = random.choices
             p0 = ''.join(choices('abcde', k=rr(10))) * rr(10, 20)
             p = p0[:len(p0) - rr(10)] # pop off some characters
             left = ''.join(choices('abcdef', k=rr(2000)))
@@ -341,6 +342,13 @@ def reference_find(p, s):
                 self.checkequal(reference_find(p, text),
                                 text, 'find', p)
 
+        rr = random.randrange
+        for _ in range(1000):
+            check_pattern(rr)
+
+        # Test that empty string always work:
+        check_pattern(lambda *args: 0)
+
     def test_find_many_lengths(self):
         haystack_repeats = [a * 10**e for e in range(6) for a in (1,2,5)]
         haystacks = [(n, self.fixtype("abcab"*n + "da")) for n in haystack_repeats]
diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py
index 38d5012ba46c08..900b9c96d08a64 100644
--- a/Lib/test/support/__init__.py
+++ b/Lib/test/support/__init__.py
@@ -436,6 +436,14 @@ def skip_if_sanitizer(reason=None, *, address=False, memory=False, ub=False):
     return unittest.skipIf(skip, reason)
 
 
+def set_sanitizer_env_var(env, option):
+    for name in ('ASAN_OPTIONS', 'MSAN_OPTIONS', 'UBSAN_OPTIONS'):
+        if name in env:
+            env[name] += f':{option}'
+        else:
+            env[name] = option
+
+
 def system_must_validate_cert(f):
     """Skip the test on TLS certificate validation failures."""
     @functools.wraps(f)
diff --git a/Lib/test/support/interpreters.py b/Lib/test/support/interpreters.py
index eeff3abe0324e5..3b501614bc4b4d 100644
--- a/Lib/test/support/interpreters.py
+++ b/Lib/test/support/interpreters.py
@@ -7,7 +7,8 @@
 # aliases:
 from _xxsubinterpreters import is_shareable
 from _xxinterpchannels import (
-    ChannelError, ChannelNotFoundError, ChannelEmptyError,
+    ChannelError, ChannelNotFoundError, ChannelClosedError,
+    ChannelEmptyError, ChannelNotEmptyError,
 )
 
 
@@ -117,10 +118,16 @@ def list_all_channels():
 class _ChannelEnd:
     """The base class for RecvChannel and SendChannel."""
 
-    def __init__(self, id):
-        if not isinstance(id, (int, _channels.ChannelID)):
-            raise TypeError(f'id must be an int, got {id!r}')
-        self._id = id
+    _end = None
+
+    def __init__(self, cid):
+        if self._end == 'send':
+            cid = _channels._channel_id(cid, send=True, force=True)
+        elif self._end == 'recv':
+            cid = _channels._channel_id(cid, recv=True, force=True)
+        else:
+            raise NotImplementedError(self._end)
+        self._id = cid
 
     def __repr__(self):
         return f'{type(self).__name__}(id={int(self._id)})'
@@ -147,6 +154,8 @@ def id(self):
 class RecvChannel(_ChannelEnd):
     """The receiving end of a cross-interpreter channel."""
 
+    _end = 'recv'
+
     def recv(self, *, _sentinel=object(), _delay=10 / 1000):  # 10 milliseconds
         """Return the next object from the channel.
 
@@ -171,10 +180,15 @@ def recv_nowait(self, default=_NOT_SET):
         else:
             return _channels.recv(self._id, default)
 
+    def close(self):
+        _channels.close(self._id, recv=True)
+
 
 class SendChannel(_ChannelEnd):
     """The sending end of a cross-interpreter channel."""
 
+    _end = 'send'
+
     def send(self, obj):
         """Send the object (i.e. its data) to the channel's receiving end.
 
@@ -196,3 +210,10 @@ def send_nowait(self, obj):
         # None.  This should be fixed when channel_send_wait() is added.
         # See bpo-32604 and gh-19829.
         return _channels.send(self._id, obj)
+
+    def close(self):
+        _channels.close(self._id, send=True)
+
+
+# XXX This is causing leaks (gh-110318):
+#_channels._register_end_types(SendChannel, RecvChannel)
diff --git a/Lib/test/test_asyncio/test_events.py b/Lib/test/test_asyncio/test_events.py
index 3ee6565b2b65ad..b25c0975736e20 100644
--- a/Lib/test/test_asyncio/test_events.py
+++ b/Lib/test/test_asyncio/test_events.py
@@ -1693,12 +1693,9 @@ async def main():
                 self.loop.stop()
             return res
 
-        start = time.monotonic()
         t = self.loop.create_task(main())
         self.loop.run_forever()
-        elapsed = time.monotonic() - start
 
-        self.assertLess(elapsed, 0.1)
         self.assertEqual(t.result(), 'cancelled')
         self.assertRaises(asyncio.CancelledError, f.result)
         if ov is not None:
diff --git a/Lib/test/test_asyncio/test_unix_events.py b/Lib/test/test_asyncio/test_unix_events.py
index 7322be597ae2d2..d2c8cba6acfa31 100644
--- a/Lib/test/test_asyncio/test_unix_events.py
+++ b/Lib/test/test_asyncio/test_unix_events.py
@@ -4,6 +4,7 @@
 import errno
 import io
 import multiprocessing
+from multiprocessing.util import _cleanup_tests as multiprocessing_cleanup_tests
 import os
 import pathlib
 import signal
@@ -15,6 +16,7 @@
 import unittest
 from unittest import mock
 import warnings
+
 from test import support
 from test.support import os_helper
 from test.support import socket_helper
@@ -1903,6 +1905,8 @@ async def test_fork_not_share_event_loop(self):
 
     @hashlib_helper.requires_hashdigest('md5')
     def test_fork_signal_handling(self):
+        self.addCleanup(multiprocessing_cleanup_tests)
+
         # Sending signal to the forked process should not affect the parent
         # process
         ctx = multiprocessing.get_context('fork')
@@ -1947,6 +1951,8 @@ async def func():
 
     @hashlib_helper.requires_hashdigest('md5')
     def test_fork_asyncio_run(self):
+        self.addCleanup(multiprocessing_cleanup_tests)
+
         ctx = multiprocessing.get_context('fork')
         manager = ctx.Manager()
         self.addCleanup(manager.shutdown)
@@ -1964,6 +1970,8 @@ async def child_main():
 
     @hashlib_helper.requires_hashdigest('md5')
     def test_fork_asyncio_subprocess(self):
+        self.addCleanup(multiprocessing_cleanup_tests)
+
         ctx = multiprocessing.get_context('fork')
         manager = ctx.Manager()
         self.addCleanup(manager.shutdown)
diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
index 33cb248ff6e82b..b7966f8f03875b 100644
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -2203,8 +2203,6 @@ def _run_child(self, child, terminal_input):
         if pid == 0:
             # Child
             try:
-                # Make sure we don't get stuck if there's a problem
-                signal.alarm(2)
                 os.close(r)
                 with open(w, "w") as wpipe:
                     child(wpipe)
diff --git a/Lib/test/test_concurrent_futures/test_deadlock.py b/Lib/test/test_concurrent_futures/test_deadlock.py
index af702542081ad9..3c30c4558c0b3e 100644
--- a/Lib/test/test_concurrent_futures/test_deadlock.py
+++ b/Lib/test/test_concurrent_futures/test_deadlock.py
@@ -286,11 +286,12 @@ def wakeup(self):
                 super().wakeup()
 
             def clear(self):
+                super().clear()
                 try:
                     while True:
                         self._dummy_queue.get_nowait()
                 except queue.Empty:
-                    super().clear()
+                    pass
 
         with (unittest.mock.patch.object(futures.process._ExecutorManagerThread,
                                          'run', mock_run),
diff --git a/Lib/test/test_concurrent_futures/test_thread_pool.py b/Lib/test/test_concurrent_futures/test_thread_pool.py
index 812f989d8f3ad2..5926a632aa4bec 100644
--- a/Lib/test/test_concurrent_futures/test_thread_pool.py
+++ b/Lib/test/test_concurrent_futures/test_thread_pool.py
@@ -25,7 +25,7 @@ def record_finished(n):
 
     def test_default_workers(self):
         executor = self.executor_type()
-        expected = min(32, (os.cpu_count() or 1) + 4)
+        expected = min(32, (os.process_cpu_count() or 1) + 4)
         self.assertEqual(executor._max_workers, expected)
 
     def test_saturation(self):
diff --git a/Lib/test/test_copy.py b/Lib/test/test_copy.py
index c66c6eeb00811e..60735ba89a80ee 100644
--- a/Lib/test/test_copy.py
+++ b/Lib/test/test_copy.py
@@ -936,14 +936,24 @@ def __replace__(self, **changes):
 
     def test_namedtuple(self):
         from collections import namedtuple
-        Point = namedtuple('Point', 'x y', defaults=(0,))
-        p = Point(11, 22)
-        self.assertEqual(copy.replace(p), (11, 22))
-        self.assertEqual(copy.replace(p, x=1), (1, 22))
-        self.assertEqual(copy.replace(p, y=2), (11, 2))
-        self.assertEqual(copy.replace(p, x=1, y=2), (1, 2))
-        with self.assertRaisesRegex(ValueError, 'unexpected field name'):
-            copy.replace(p, x=1, error=2)
+        from typing import NamedTuple
+        PointFromCall = namedtuple('Point', 'x y', defaults=(0,))
+        class PointFromInheritance(PointFromCall):
+            pass
+        class PointFromClass(NamedTuple):
+            x: int
+            y: int = 0
+        for Point in (PointFromCall, PointFromInheritance, PointFromClass):
+            with self.subTest(Point=Point):
+                p = Point(11, 22)
+                self.assertIsInstance(p, Point)
+                self.assertEqual(copy.replace(p), (11, 22))
+                self.assertIsInstance(copy.replace(p), Point)
+                self.assertEqual(copy.replace(p, x=1), (1, 22))
+                self.assertEqual(copy.replace(p, y=2), (11, 2))
+                self.assertEqual(copy.replace(p, x=1, y=2), (1, 2))
+                with self.assertRaisesRegex(ValueError, 'unexpected field name'):
+                    copy.replace(p, x=1, error=2)
 
     def test_dataclass(self):
         from dataclasses import dataclass
diff --git a/Lib/test/test_dataclasses/__init__.py b/Lib/test/test_dataclasses/__init__.py
index 7c07dfc77de208..f629d7bb53959b 100644
--- a/Lib/test/test_dataclasses/__init__.py
+++ b/Lib/test/test_dataclasses/__init__.py
@@ -3965,9 +3965,9 @@ class C:
         self.assertEqual((c1.x, c1.y, c1.z, c1.t), (3, 2, 10, 100))
 
 
-        with self.assertRaisesRegex(ValueError, 'init=False'):
+        with self.assertRaisesRegex(TypeError, 'init=False'):
             replace(c, x=3, z=20, t=50)
-        with self.assertRaisesRegex(ValueError, 'init=False'):
+        with self.assertRaisesRegex(TypeError, 'init=False'):
             replace(c, z=20)
             replace(c, x=3, z=20, t=50)
 
@@ -4020,10 +4020,10 @@ class C:
         self.assertEqual((c1.x, c1.y), (5, 10))
 
         # Trying to replace y is an error.
-        with self.assertRaisesRegex(ValueError, 'init=False'):
+        with self.assertRaisesRegex(TypeError, 'init=False'):
             replace(c, x=2, y=30)
 
-        with self.assertRaisesRegex(ValueError, 'init=False'):
+        with self.assertRaisesRegex(TypeError, 'init=False'):
             replace(c, y=30)
 
     def test_classvar(self):
@@ -4056,8 +4056,8 @@ def __post_init__(self, y):
 
         c = C(1, 10)
         self.assertEqual(c.x, 10)
-        with self.assertRaisesRegex(ValueError, r"InitVar 'y' must be "
-                                    "specified with replace()"):
+        with self.assertRaisesRegex(TypeError, r"InitVar 'y' must be "
+                                    r"specified with replace\(\)"):
             replace(c, x=3)
         c = replace(c, x=3, y=5)
         self.assertEqual(c.x, 15)
diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py
index 7f1a4e665f3b5d..dc476ef83c2519 100644
--- a/Lib/test/test_embed.py
+++ b/Lib/test/test_embed.py
@@ -455,6 +455,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
         'code_debug_ranges': 1,
         'show_ref_count': 0,
         'dump_refs': 0,
+        'dump_refs_file': None,
         'malloc_stats': 0,
 
         'filesystem_encoding': GET_DEFAULT_CONFIG,
@@ -504,6 +505,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
         'run_command': None,
         'run_module': None,
         'run_filename': None,
+        'sys_path_0': None,
 
         '_install_importlib': 1,
         'check_hash_pycs_mode': 'default',
@@ -1131,6 +1133,7 @@ def test_init_run_main(self):
             'program_name': './python3',
             'run_command': code + '\n',
             'parse_argv': 2,
+            'sys_path_0': '',
         }
         self.check_all_configs("test_init_run_main", config, api=API_PYTHON)
 
@@ -1146,6 +1149,7 @@ def test_init_main(self):
             'run_command': code + '\n',
             'parse_argv': 2,
             '_init_main': 0,
+            'sys_path_0': '',
         }
         self.check_all_configs("test_init_main", config,
                                api=API_PYTHON,
diff --git a/Lib/test/test_faulthandler.py b/Lib/test/test_faulthandler.py
index 3c1e8c150ae711..0b8299a32b03c0 100644
--- a/Lib/test/test_faulthandler.py
+++ b/Lib/test/test_faulthandler.py
@@ -35,7 +35,7 @@ def expected_traceback(lineno1, lineno2, header, min_count=1):
         return '^' + regex + '$'
 
 def skip_segfault_on_android(test):
-    # Issue #32138: Raising SIGSEGV on Android may not cause a crash.
+    # gh-76319: Raising SIGSEGV on Android may not cause a crash.
     return unittest.skipIf(is_android,
                            'raising SIGSEGV on Android is unreliable')(test)
 
@@ -67,11 +67,7 @@ def get_output(self, code, filename=None, fd=None):
 
         # Sanitizers must not handle SIGSEGV (ex: for test_enable_fd())
         option = 'handle_segv=0'
-        for name in ('ASAN_OPTIONS', 'MSAN_OPTIONS', 'UBSAN_OPTIONS'):
-            if name in env:
-                env[name] += f':{option}'
-            else:
-                env[name] = option
+        support.set_sanitizer_env_var(env, option)
 
         with support.SuppressCrashReport():
             process = script_helper.spawn_python('-c', code,
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
index 4f05a149a901b2..dd8c2dd628ee13 100644
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -514,6 +514,54 @@ def test_ast_fstring_empty_format_spec(self):
         self.assertEqual(type(format_spec), ast.JoinedStr)
         self.assertEqual(len(format_spec.values), 0)
 
+    def test_ast_fstring_format_spec(self):
+        expr = "f'{1:{name}}'"
+
+        mod = ast.parse(expr)
+        self.assertEqual(type(mod), ast.Module)
+        self.assertEqual(len(mod.body), 1)
+
+        fstring = mod.body[0].value
+        self.assertEqual(type(fstring), ast.JoinedStr)
+        self.assertEqual(len(fstring.values), 1)
+
+        fv = fstring.values[0]
+        self.assertEqual(type(fv), ast.FormattedValue)
+
+        format_spec = fv.format_spec
+        self.assertEqual(type(format_spec), ast.JoinedStr)
+        self.assertEqual(len(format_spec.values), 1)
+
+        format_spec_value = format_spec.values[0]
+        self.assertEqual(type(format_spec_value), ast.FormattedValue)
+        self.assertEqual(format_spec_value.value.id, 'name')
+
+        expr = "f'{1:{name1}{name2}}'"
+
+        mod = ast.parse(expr)
+        self.assertEqual(type(mod), ast.Module)
+        self.assertEqual(len(mod.body), 1)
+
+        fstring = mod.body[0].value
+        self.assertEqual(type(fstring), ast.JoinedStr)
+        self.assertEqual(len(fstring.values), 1)
+
+        fv = fstring.values[0]
+        self.assertEqual(type(fv), ast.FormattedValue)
+
+        format_spec = fv.format_spec
+        self.assertEqual(type(format_spec), ast.JoinedStr)
+        self.assertEqual(len(format_spec.values), 2)
+
+        format_spec_value = format_spec.values[0]
+        self.assertEqual(type(format_spec_value), ast.FormattedValue)
+        self.assertEqual(format_spec_value.value.id, 'name1')
+
+        format_spec_value = format_spec.values[1]
+        self.assertEqual(type(format_spec_value), ast.FormattedValue)
+        self.assertEqual(format_spec_value.value.id, 'name2')
+
+
     def test_docstring(self):
         def f():
             f'''Not a docstring'''
diff --git a/Lib/test/test_gdb/test_cfunction_full.py b/Lib/test/test_gdb/test_cfunction_full.py
index 3e90cb1d392f49..572cbdab5d77c0 100644
--- a/Lib/test/test_gdb/test_cfunction_full.py
+++ b/Lib/test/test_gdb/test_cfunction_full.py
@@ -18,7 +18,7 @@ def check(self, func_name, cmd):
         gdb_output = self.get_stack_trace(
             cmd,
             breakpoint=func_name,
-            cmds_after_breakpoint=['py-bt-full'],
+            cmds_after_breakpoint=['bt', 'py-bt-full'],
             # bpo-45207: Ignore 'Function "meth_varargs" not
             # defined.' message in stderr.
             ignore_stderr=True,
diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py
index b5eaf824aee706..5971d2e436e4aa 100644
--- a/Lib/test/test_generated_cases.py
+++ b/Lib/test/test_generated_cases.py
@@ -239,16 +239,22 @@ def test_overlap(self):
 
     def test_predictions_and_eval_breaker(self):
         input = """
-        inst(OP1, (--)) {
+        inst(OP1, (arg -- rest)) {
         }
         inst(OP3, (arg -- res)) {
-            DEOPT_IF(xxx, OP1);
+            DEOPT_IF(xxx);
             CHECK_EVAL_BREAKER();
         }
+        family(OP1, INLINE_CACHE_ENTRIES_OP1) = { OP3 };
     """
         output = """
         TARGET(OP1) {
             PREDICTED(OP1);
+            static_assert(INLINE_CACHE_ENTRIES_OP1 == 0, "incorrect cache size");
+            PyObject *arg;
+            PyObject *rest;
+            arg = stack_pointer[-1];
+            stack_pointer[-1] = rest;
             DISPATCH();
         }
 
@@ -371,6 +377,7 @@ def test_macro_instruction(self):
         }
 
         TARGET(OP) {
+            PREDICTED(OP);
             static_assert(INLINE_CACHE_ENTRIES_OP == 5, "incorrect cache size");
             PyObject *right;
             PyObject *left;
diff --git a/Lib/test/test_interpreters.py b/Lib/test/test_interpreters.py
index 9c0dac7d6c61fb..f2ef172d26dfc8 100644
--- a/Lib/test/test_interpreters.py
+++ b/Lib/test/test_interpreters.py
@@ -1,5 +1,7 @@
 import contextlib
+import json
 import os
+import os.path
 import sys
 import threading
 from textwrap import dedent
@@ -9,6 +11,7 @@
 from test import support
 from test.support import import_helper
 from test.support import threading_helper
+from test.support import os_helper
 _interpreters = import_helper.import_module('_xxsubinterpreters')
 _channels = import_helper.import_module('_xxinterpchannels')
 from test.support import interpreters
@@ -65,6 +68,17 @@ def run():
 
 class TestBase(unittest.TestCase):
 
+    def pipe(self):
+        def ensure_closed(fd):
+            try:
+                os.close(fd)
+            except OSError:
+                pass
+        r, w = os.pipe()
+        self.addCleanup(lambda: ensure_closed(r))
+        self.addCleanup(lambda: ensure_closed(w))
+        return r, w
+
     def tearDown(self):
         clean_up_interpreters()
 
@@ -258,6 +272,16 @@ def test_subinterpreter(self):
             self.assertTrue(interp.is_running())
         self.assertFalse(interp.is_running())
 
+    def test_finished(self):
+        r, w = self.pipe()
+        interp = interpreters.create()
+        interp.run(f"""if True:
+            import os
+            os.write({w}, b'x')
+            """)
+        self.assertFalse(interp.is_running())
+        self.assertEqual(os.read(r, 1), b'x')
+
     def test_from_subinterpreter(self):
         interp = interpreters.create()
         out = _run_output(interp, dedent(f"""
@@ -285,6 +309,31 @@ def test_bad_id(self):
         with self.assertRaises(ValueError):
             interp.is_running()
 
+    def test_with_only_background_threads(self):
+        r_interp, w_interp = self.pipe()
+        r_thread, w_thread = self.pipe()
+
+        DONE = b'D'
+        FINISHED = b'F'
+
+        interp = interpreters.create()
+        interp.run(f"""if True:
+            import os
+            import threading
+
+            def task():
+                v = os.read({r_thread}, 1)
+                assert v == {DONE!r}
+                os.write({w_interp}, {FINISHED!r})
+            t = threading.Thread(target=task)
+            t.start()
+            """)
+        self.assertFalse(interp.is_running())
+
+        os.write(w_thread, DONE)
+        interp.run('t.join()')
+        self.assertEqual(os.read(r_interp, 1), FINISHED)
+
 
 class TestInterpreterClose(TestBase):
 
@@ -386,6 +435,37 @@ def test_still_running(self):
                 interp.close()
             self.assertTrue(interp.is_running())
 
+    def test_subthreads_still_running(self):
+        r_interp, w_interp = self.pipe()
+        r_thread, w_thread = self.pipe()
+
+        FINISHED = b'F'
+
+        interp = interpreters.create()
+        interp.run(f"""if True:
+            import os
+            import threading
+            import time
+
+            done = False
+
+            def notify_fini():
+                global done
+                done = True
+                t.join()
+            threading._register_atexit(notify_fini)
+
+            def task():
+                while not done:
+                    time.sleep(0.1)
+                os.write({w_interp}, {FINISHED!r})
+            t = threading.Thread(target=task)
+            t.start()
+            """)
+        interp.close()
+
+        self.assertEqual(os.read(r_interp, 1), FINISHED)
+
 
 class TestInterpreterRun(TestBase):
 
@@ -462,6 +542,37 @@ def test_bytes_for_script(self):
         with self.assertRaises(TypeError):
             interp.run(b'print("spam")')
 
+    def test_with_background_threads_still_running(self):
+        r_interp, w_interp = self.pipe()
+        r_thread, w_thread = self.pipe()
+
+        RAN = b'R'
+        DONE = b'D'
+        FINISHED = b'F'
+
+        interp = interpreters.create()
+        interp.run(f"""if True:
+            import os
+            import threading
+
+            def task():
+                v = os.read({r_thread}, 1)
+                assert v == {DONE!r}
+                os.write({w_interp}, {FINISHED!r})
+            t = threading.Thread(target=task)
+            t.start()
+            os.write({w_interp}, {RAN!r})
+            """)
+        interp.run(f"""if True:
+            os.write({w_interp}, {RAN!r})
+            """)
+
+        os.write(w_thread, DONE)
+        interp.run('t.join()')
+        self.assertEqual(os.read(r_interp, 1), RAN)
+        self.assertEqual(os.read(r_interp, 1), RAN)
+        self.assertEqual(os.read(r_interp, 1), FINISHED)
+
     # test_xxsubinterpreters covers the remaining Interpreter.run() behavior.
 
 
@@ -488,6 +599,154 @@ def task():
             pass
 
 
+class StartupTests(TestBase):
+
+    # We want to ensure the initial state of subinterpreters
+    # matches expectations.
+
+    _subtest_count = 0
+
+    @contextlib.contextmanager
+    def subTest(self, *args):
+        with super().subTest(*args) as ctx:
+            self._subtest_count += 1
+            try:
+                yield ctx
+            finally:
+                if self._debugged_in_subtest:
+                    if self._subtest_count == 1:
+                        # The first subtest adds a leading newline, so we
+                        # compensate here by not printing a trailing newline.
+                        print('### end subtest debug ###', end='')
+                    else:
+                        print('### end subtest debug ###')
+                self._debugged_in_subtest = False
+
+    def debug(self, msg, *, header=None):
+        if header:
+            self._debug(f'--- {header} ---')
+            if msg:
+                if msg.endswith(os.linesep):
+                    self._debug(msg[:-len(os.linesep)])
+                else:
+                    self._debug(msg)
+                    self._debug('<no newline>')
+            self._debug('------')
+        else:
+            self._debug(msg)
+
+    _debugged = False
+    _debugged_in_subtest = False
+    def _debug(self, msg):
+        if not self._debugged:
+            print()
+            self._debugged = True
+        if self._subtest is not None:
+            if True:
+                if not self._debugged_in_subtest:
+                    self._debugged_in_subtest = True
+                    print('### start subtest debug ###')
+                print(msg)
+        else:
+            print(msg)
+
+    def create_temp_dir(self):
+        import tempfile
+        tmp = tempfile.mkdtemp(prefix='test_interpreters_')
+        tmp = os.path.realpath(tmp)
+        self.addCleanup(os_helper.rmtree, tmp)
+        return tmp
+
+    def write_script(self, *path, text):
+        filename = os.path.join(*path)
+        dirname = os.path.dirname(filename)
+        if dirname:
+            os.makedirs(dirname, exist_ok=True)
+        with open(filename, 'w', encoding='utf-8') as outfile:
+            outfile.write(dedent(text))
+        return filename
+
+    @support.requires_subprocess()
+    def run_python(self, argv, *, cwd=None):
+        # This method is inspired by
+        # EmbeddingTestsMixin.run_embedded_interpreter() in test_embed.py.
+        import shlex
+        import subprocess
+        if isinstance(argv, str):
+            argv = shlex.split(argv)
+        argv = [sys.executable, *argv]
+        try:
+            proc = subprocess.run(
+                argv,
+                cwd=cwd,
+                capture_output=True,
+                text=True,
+            )
+        except Exception as exc:
+            self.debug(f'# cmd: {shlex.join(argv)}')
+            if isinstance(exc, FileNotFoundError) and not exc.filename:
+                if os.path.exists(argv[0]):
+                    exists = 'exists'
+                else:
+                    exists = 'does not exist'
+                self.debug(f'{argv[0]} {exists}')
+            raise  # re-raise
+        assert proc.stderr == '' or proc.returncode != 0, proc.stderr
+        if proc.returncode != 0 and support.verbose:
+            self.debug(f'# python3 {shlex.join(argv[1:])} failed:')
+            self.debug(proc.stdout, header='stdout')
+            self.debug(proc.stderr, header='stderr')
+        self.assertEqual(proc.returncode, 0)
+        self.assertEqual(proc.stderr, '')
+        return proc.stdout
+
+    def test_sys_path_0(self):
+        # The main interpreter's sys.path[0] should be used by subinterpreters.
+        script = '''
+            import sys
+            from test.support import interpreters
+
+            orig = sys.path[0]
+
+            interp = interpreters.create()
+            interp.run(f"""if True:
+                import json
+                import sys
+                print(json.dumps({{
+                    'main': {orig!r},
+                    'sub': sys.path[0],
+                }}, indent=4), flush=True)
+                """)
+            '''
+        # <tmp>/
+        #   pkg/
+        #     __init__.py
+        #     __main__.py
+        #     script.py
+        #   script.py
+        cwd = self.create_temp_dir()
+        self.write_script(cwd, 'pkg', '__init__.py', text='')
+        self.write_script(cwd, 'pkg', '__main__.py', text=script)
+        self.write_script(cwd, 'pkg', 'script.py', text=script)
+        self.write_script(cwd, 'script.py', text=script)
+
+        cases = [
+            ('script.py', cwd),
+            ('-m script', cwd),
+            ('-m pkg', cwd),
+            ('-m pkg.script', cwd),
+            ('-c "import script"', ''),
+        ]
+        for argv, expected in cases:
+            with self.subTest(f'python3 {argv}'):
+                out = self.run_python(argv, cwd=cwd)
+                data = json.loads(out)
+                sp0_main, sp0_sub = data['main'], data['sub']
+                self.assertEqual(sp0_sub, sp0_main)
+                self.assertEqual(sp0_sub, expected)
+        # XXX Also check them all with the -P cmdline flag?
+
+
 class FinalizationTests(TestBase):
 
     def test_gh_109793(self):
@@ -574,6 +833,23 @@ def test_list_all(self):
         after = set(interpreters.list_all_channels())
         self.assertEqual(after, created)
 
+    @unittest.expectedFailure  # See gh-110318:
+    def test_shareable(self):
+        rch, sch = interpreters.create_channel()
+
+        self.assertTrue(
+            interpreters.is_shareable(rch))
+        self.assertTrue(
+            interpreters.is_shareable(sch))
+
+        sch.send_nowait(rch)
+        sch.send_nowait(sch)
+        rch2 = rch.recv()
+        sch2 = rch.recv()
+
+        self.assertEqual(rch2, rch)
+        self.assertEqual(sch2, sch)
+
 
 class TestRecvChannelAttrs(TestBase):
 
diff --git a/Lib/test/test_launcher.py b/Lib/test/test_launcher.py
index 362b507d158288..bcd4ed63bf25a0 100644
--- a/Lib/test/test_launcher.py
+++ b/Lib/test/test_launcher.py
@@ -717,3 +717,25 @@ def test_literal_shebang_invalid_template(self):
             f"{expect} arg1 {script}",
             data["stdout"].strip(),
         )
+
+    def test_shebang_command_in_venv(self):
+        stem = "python-that-is-not-on-path"
+
+        # First ensure that our test name doesn't exist, and the launcher does
+        # not match any installed env
+        with self.script(f'#! /usr/bin/env {stem} arg1') as script:
+            data = self.run_py([script], expect_returncode=103)
+
+        with self.fake_venv() as (venv_exe, env):
+            # Put a real Python (ourselves) on PATH as a distraction.
+            # The active VIRTUAL_ENV should be preferred when the name isn't an
+            # exact match.
+            env["PATH"] = f"{Path(sys.executable).parent};{os.environ['PATH']}"
+
+            with self.script(f'#! /usr/bin/env {stem} arg1') as script:
+                data = self.run_py([script], env=env)
+            self.assertEqual(data["stdout"].strip(), f"{venv_exe} arg1 {script}")
+
+            with self.script(f'#! /usr/bin/env {Path(sys.executable).stem} arg1') as script:
+                data = self.run_py([script], env=env)
+            self.assertEqual(data["stdout"].strip(), f"{sys.executable} arg1 {script}")
diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py
index d91dcdfb0c5fac..3e710d1c6dabe4 100644
--- a/Lib/test/test_ntpath.py
+++ b/Lib/test/test_ntpath.py
@@ -2,6 +2,7 @@
 import ntpath
 import os
 import string
+import subprocess
 import sys
 import unittest
 import warnings
@@ -637,6 +638,48 @@ def test_realpath_cwd(self):
         with os_helper.change_cwd(test_dir_short):
             self.assertPathEqual(test_file_long, ntpath.realpath("file.txt"))
 
+    @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
+    def test_realpath_permission(self):
+        # Test whether python can resolve the real filename of a
+        # shortened file name even if it does not have permission to access it.
+        ABSTFN = ntpath.realpath(os_helper.TESTFN)
+
+        os_helper.unlink(ABSTFN)
+        os_helper.rmtree(ABSTFN)
+        os.mkdir(ABSTFN)
+        self.addCleanup(os_helper.rmtree, ABSTFN)
+
+        test_file = ntpath.join(ABSTFN, "LongFileName123.txt")
+        test_file_short = ntpath.join(ABSTFN, "LONGFI~1.TXT")
+
+        with open(test_file, "wb") as f:
+            f.write(b"content")
+        # Automatic generation of short names may be disabled on
+        # NTFS volumes for the sake of performance.
+        # They're not supported at all on ReFS and exFAT.
+        subprocess.run(
+            # Try to set the short name manually.
+            ['fsutil.exe', 'file', 'setShortName', test_file, 'LONGFI~1.TXT'],
+            creationflags=subprocess.DETACHED_PROCESS
+        )
+
+        try:
+            self.assertPathEqual(test_file, ntpath.realpath(test_file_short))
+        except AssertionError:
+            raise unittest.SkipTest('the filesystem seems to lack support for short filenames')
+
+        # Deny the right to [S]YNCHRONIZE on the file to
+        # force nt._getfinalpathname to fail with ERROR_ACCESS_DENIED.
+        p = subprocess.run(
+            ['icacls.exe', test_file, '/deny', '*S-1-5-32-545:(S)'],
+            creationflags=subprocess.DETACHED_PROCESS
+        )
+
+        if p.returncode:
+            raise unittest.SkipTest('failed to deny access to the test file')
+
+        self.assertPathEqual(test_file, ntpath.realpath(test_file_short))
+
     def test_expandvars(self):
         with os_helper.EnvironmentVarGuard() as env:
             env.clear()
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py
index 66aece2c4b3eb9..c1a78a70c09441 100644
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -3996,14 +3996,42 @@ def test_oserror_filename(self):
                     self.fail(f"No exception thrown by {func}")
 
 class CPUCountTests(unittest.TestCase):
+    def check_cpu_count(self, cpus):
+        if cpus is None:
+            self.skipTest("Could not determine the number of CPUs")
+
+        self.assertIsInstance(cpus, int)
+        self.assertGreater(cpus, 0)
+
     def test_cpu_count(self):
         cpus = os.cpu_count()
-        if cpus is not None:
-            self.assertIsInstance(cpus, int)
-            self.assertGreater(cpus, 0)
-        else:
+        self.check_cpu_count(cpus)
+
+    def test_process_cpu_count(self):
+        cpus = os.process_cpu_count()
+        self.assertLessEqual(cpus, os.cpu_count())
+        self.check_cpu_count(cpus)
+
+    @unittest.skipUnless(hasattr(os, 'sched_setaffinity'),
+                         "don't have sched affinity support")
+    def test_process_cpu_count_affinity(self):
+        ncpu = os.cpu_count()
+        if ncpu is None:
             self.skipTest("Could not determine the number of CPUs")
 
+        # Disable one CPU
+        mask = os.sched_getaffinity(0)
+        if len(mask) <= 1:
+            self.skipTest(f"sched_getaffinity() returns less than "
+                          f"2 CPUs: {sorted(mask)}")
+        self.addCleanup(os.sched_setaffinity, 0, list(mask))
+        mask.pop()
+        os.sched_setaffinity(0, mask)
+
+        # test process_cpu_count()
+        affinity = os.process_cpu_count()
+        self.assertEqual(affinity, ncpu - 1)
+
 
 # FD inheritance check is only useful for systems with process support.
 @support.requires_subprocess()
diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py
index d5a2b175378852..f077825fc7c09a 100644
--- a/Lib/test/test_pathlib.py
+++ b/Lib/test/test_pathlib.py
@@ -11,6 +11,7 @@
 import tempfile
 import unittest
 from unittest import mock
+from urllib.request import pathname2url
 
 from test.support import import_helper
 from test.support import set_recursion_limit
@@ -3568,6 +3569,24 @@ def test_handling_bad_descriptor(self):
                 self.fail("Bad file descriptor not handled.")
             raise
 
+    def test_from_uri(self):
+        P = self.cls
+        self.assertEqual(P.from_uri('file:/foo/bar'), P('/foo/bar'))
+        self.assertEqual(P.from_uri('file://foo/bar'), P('//foo/bar'))
+        self.assertEqual(P.from_uri('file:///foo/bar'), P('/foo/bar'))
+        self.assertEqual(P.from_uri('file:////foo/bar'), P('//foo/bar'))
+        self.assertEqual(P.from_uri('file://localhost/foo/bar'), P('/foo/bar'))
+        self.assertRaises(ValueError, P.from_uri, 'foo/bar')
+        self.assertRaises(ValueError, P.from_uri, '/foo/bar')
+        self.assertRaises(ValueError, P.from_uri, '//foo/bar')
+        self.assertRaises(ValueError, P.from_uri, 'file:foo/bar')
+        self.assertRaises(ValueError, P.from_uri, 'http://foo/bar')
+
+    def test_from_uri_pathname2url(self):
+        P = self.cls
+        self.assertEqual(P.from_uri('file:' + pathname2url('/foo/bar')), P('/foo/bar'))
+        self.assertEqual(P.from_uri('file:' + pathname2url('//foo/bar')), P('//foo/bar'))
+
 
 @only_nt
 class WindowsPathTest(PathTest):
@@ -3687,6 +3706,31 @@ def check():
             env['HOME'] = 'C:\\Users\\eve'
             check()
 
+    def test_from_uri(self):
+        P = self.cls
+        # DOS drive paths
+        self.assertEqual(P.from_uri('file:c:/path/to/file'), P('c:/path/to/file'))
+        self.assertEqual(P.from_uri('file:c|/path/to/file'), P('c:/path/to/file'))
+        self.assertEqual(P.from_uri('file:/c|/path/to/file'), P('c:/path/to/file'))
+        self.assertEqual(P.from_uri('file:///c|/path/to/file'), P('c:/path/to/file'))
+        # UNC paths
+        self.assertEqual(P.from_uri('file://server/path/to/file'), P('//server/path/to/file'))
+        self.assertEqual(P.from_uri('file:////server/path/to/file'), P('//server/path/to/file'))
+        self.assertEqual(P.from_uri('file://///server/path/to/file'), P('//server/path/to/file'))
+        # Localhost paths
+        self.assertEqual(P.from_uri('file://localhost/c:/path/to/file'), P('c:/path/to/file'))
+        self.assertEqual(P.from_uri('file://localhost/c|/path/to/file'), P('c:/path/to/file'))
+        # Invalid paths
+        self.assertRaises(ValueError, P.from_uri, 'foo/bar')
+        self.assertRaises(ValueError, P.from_uri, 'c:/foo/bar')
+        self.assertRaises(ValueError, P.from_uri, '//foo/bar')
+        self.assertRaises(ValueError, P.from_uri, 'file:foo/bar')
+        self.assertRaises(ValueError, P.from_uri, 'http://foo/bar')
+
+    def test_from_uri_pathname2url(self):
+        P = self.cls
+        self.assertEqual(P.from_uri('file:' + pathname2url(r'c:\path\to\file')), P('c:/path/to/file'))
+        self.assertEqual(P.from_uri('file:' + pathname2url(r'\\server\path\to\file')), P('//server/path/to/file'))
 
 
 class PathSubclassTest(PathTest):
diff --git a/Lib/test/test_posix.py b/Lib/test/test_posix.py
index 444f8abe4607b7..9d72dba159c6be 100644
--- a/Lib/test/test_posix.py
+++ b/Lib/test/test_posix.py
@@ -1205,6 +1205,7 @@ def test_sched_getaffinity(self):
     @requires_sched_affinity
     def test_sched_setaffinity(self):
         mask = posix.sched_getaffinity(0)
+        self.addCleanup(posix.sched_setaffinity, 0, list(mask))
         if len(mask) > 1:
             # Empty masks are forbidden
             mask.pop()
diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py
index c98b05abcea98c..de2c4317e71439 100644
--- a/Lib/test/test_regrtest.py
+++ b/Lib/test/test_regrtest.py
@@ -14,6 +14,7 @@
 import random
 import re
 import shlex
+import signal
 import subprocess
 import sys
 import sysconfig
@@ -41,6 +42,8 @@
 EXITCODE_RERUN_FAIL = 5
 EXITCODE_INTERRUPTED = 130
 
+MS_WINDOWS = (sys.platform == 'win32')
+
 TEST_INTERRUPTED = textwrap.dedent("""
     from signal import SIGINT, raise_signal
     try:
@@ -147,6 +150,14 @@ def test_randomize(self):
                 ns = self.parse_args([opt])
                 self.assertTrue(ns.randomize)
 
+        with os_helper.EnvironmentVarGuard() as env:
+            env['SOURCE_DATE_EPOCH'] = '1'
+
+            ns = self.parse_args(['--randomize'])
+            regrtest = main.Regrtest(ns)
+            self.assertFalse(regrtest.randomize)
+            self.assertIsNone(regrtest.random_seed)
+
     def test_randseed(self):
         ns = self.parse_args(['--randseed', '12345'])
         self.assertEqual(ns.random_seed, 12345)
@@ -383,7 +394,7 @@ def check_ci_mode(self, args, use_resources, rerun=True):
         self.assertEqual(regrtest.num_workers, -1)
         self.assertEqual(regrtest.want_rerun, rerun)
         self.assertTrue(regrtest.randomize)
-        self.assertIsNone(regrtest.random_seed)
+        self.assertIsInstance(regrtest.random_seed, int)
         self.assertTrue(regrtest.fail_env_changed)
         self.assertTrue(regrtest.fail_rerun)
         self.assertTrue(regrtest.print_slowest)
@@ -654,7 +665,7 @@ def list_regex(line_format, tests):
     def parse_random_seed(self, output):
         match = self.regex_search(r'Using random seed ([0-9]+)', output)
         randseed = int(match.group(1))
-        self.assertTrue(0 <= randseed <= 100_000_000, randseed)
+        self.assertTrue(0 <= randseed, randseed)
         return randseed
 
     def run_command(self, args, input=None, exitcode=0, **kw):
@@ -663,7 +674,7 @@ def run_command(self, args, input=None, exitcode=0, **kw):
         if 'stderr' not in kw:
             kw['stderr'] = subprocess.STDOUT
         proc = subprocess.run(args,
-                              universal_newlines=True,
+                              text=True,
                               input=input,
                               stdout=subprocess.PIPE,
                               **kw)
@@ -745,8 +756,8 @@ def check_output(self, output):
         self.check_executed_tests(output, self.tests,
                                   randomize=True, stats=len(self.tests))
 
-    def run_tests(self, args):
-        output = self.run_python(args)
+    def run_tests(self, args, env=None):
+        output = self.run_python(args, env=env)
         self.check_output(output)
 
     def test_script_regrtest(self):
@@ -787,14 +798,6 @@ def test_script_autotest(self):
         args = [*self.python_args, script, *self.regrtest_args, *self.tests]
         self.run_tests(args)
 
-    @unittest.skipUnless(sysconfig.is_python_build(),
-                         'run_tests.py script is not installed')
-    def test_tools_script_run_tests(self):
-        # Tools/scripts/run_tests.py
-        script = os.path.join(ROOT_DIR, 'Tools', 'scripts', 'run_tests.py')
-        args = [script, *self.regrtest_args, *self.tests]
-        self.run_tests(args)
-
     def run_batch(self, *args):
         proc = self.run_command(args)
         self.check_output(proc.stdout)
@@ -949,6 +952,10 @@ def test_random(self):
         test_random2 = int(match.group(1))
         self.assertEqual(test_random2, test_random)
 
+        # check that random.seed is used by default
+        output = self.run_tests(test, exitcode=EXITCODE_NO_TESTS_RAN)
+        self.assertIsInstance(self.parse_random_seed(output), int)
+
     def test_fromfile(self):
         # test --fromfile
         tests = [self.create_test() for index in range(5)]
@@ -2031,6 +2038,45 @@ def test_add_python_opts(self):
             with self.subTest(opt=opt):
                 self.check_add_python_opts(opt)
 
+    # gh-76319: Raising SIGSEGV on Android may not cause a crash.
+    @unittest.skipIf(support.is_android,
+                     'raising SIGSEGV on Android is unreliable')
+    def test_worker_output_on_failure(self):
+        try:
+            from faulthandler import _sigsegv
+        except ImportError:
+            self.skipTest("need faulthandler._sigsegv")
+
+        code = textwrap.dedent(r"""
+            import faulthandler
+            import unittest
+            from test import support
+
+            class CrashTests(unittest.TestCase):
+                def test_crash(self):
+                    print("just before crash!", flush=True)
+
+                    with support.SuppressCrashReport():
+                        faulthandler._sigsegv(True)
+        """)
+        testname = self.create_test(code=code)
+
+        # Sanitizers must not handle SIGSEGV (ex: for test_enable_fd())
+        env = dict(os.environ)
+        option = 'handle_segv=0'
+        support.set_sanitizer_env_var(env, option)
+
+        output = self.run_tests("-j1", testname,
+                                exitcode=EXITCODE_BAD_TEST,
+                                env=env)
+        self.check_executed_tests(output, testname,
+                                  failed=[testname],
+                                  stats=0, parallel=True)
+        if not MS_WINDOWS:
+            exitcode = -int(signal.SIGSEGV)
+            self.assertIn(f"Exit code {exitcode} (SIGSEGV)", output)
+        self.check_line(output, "just before crash!", full=True, regex=False)
+
 
 class TestUtils(unittest.TestCase):
     def test_format_duration(self):
@@ -2066,6 +2112,35 @@ def test_normalize_test_name(self):
         self.assertIsNone(normalize('setUpModule (test.test_x)', is_error=True))
         self.assertIsNone(normalize('tearDownModule (test.test_module)', is_error=True))
 
+    def test_get_signal_name(self):
+        for exitcode, expected in (
+            (-int(signal.SIGINT), 'SIGINT'),
+            (-int(signal.SIGSEGV), 'SIGSEGV'),
+            (3221225477, "STATUS_ACCESS_VIOLATION"),
+            (0xC00000FD, "STATUS_STACK_OVERFLOW"),
+        ):
+            self.assertEqual(utils.get_signal_name(exitcode), expected, exitcode)
+
+    def test_format_resources(self):
+        format_resources = utils.format_resources
+        ALL_RESOURCES = utils.ALL_RESOURCES
+        self.assertEqual(
+            format_resources(("network",)),
+            'resources (1): network')
+        self.assertEqual(
+            format_resources(("audio", "decimal", "network")),
+            'resources (3): audio,decimal,network')
+        self.assertEqual(
+            format_resources(ALL_RESOURCES),
+            'resources: all')
+        self.assertEqual(
+            format_resources(tuple(name for name in ALL_RESOURCES
+                                   if name != "cpu")),
+            'resources: all,-cpu')
+        self.assertEqual(
+            format_resources((*ALL_RESOURCES, "tzdata")),
+            'resources: all,tzdata')
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py
index a2ca4df135846f..d231e66b7b889f 100644
--- a/Lib/test/test_shutil.py
+++ b/Lib/test/test_shutil.py
@@ -2067,6 +2067,14 @@ def setUp(self):
         self.curdir = os.curdir
         self.ext = ".EXE"
 
+    def to_text_type(self, s):
+        '''
+        In this class we're testing with str, so convert s to a str
+        '''
+        if isinstance(s, bytes):
+            return s.decode()
+        return s
+
     def test_basic(self):
         # Given an EXE in a directory, it should be returned.
         rv = shutil.which(self.file, path=self.dir)
@@ -2254,9 +2262,9 @@ def test_empty_path_no_PATH(self):
 
     @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows')
     def test_pathext(self):
-        ext = ".xyz"
+        ext = self.to_text_type(".xyz")
         temp_filexyz = tempfile.NamedTemporaryFile(dir=self.temp_dir,
-                                                   prefix="Tmp2", suffix=ext)
+                                                   prefix=self.to_text_type("Tmp2"), suffix=ext)
         os.chmod(temp_filexyz.name, stat.S_IXUSR)
         self.addCleanup(temp_filexyz.close)
 
@@ -2265,16 +2273,16 @@ def test_pathext(self):
         program = os.path.splitext(program)[0]
 
         with os_helper.EnvironmentVarGuard() as env:
-            env['PATHEXT'] = ext
+            env['PATHEXT'] = ext if isinstance(ext, str) else ext.decode()
             rv = shutil.which(program, path=self.temp_dir)
             self.assertEqual(rv, temp_filexyz.name)
 
     # Issue 40592: See https://bugs.python.org/issue40592
     @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows')
     def test_pathext_with_empty_str(self):
-        ext = ".xyz"
+        ext = self.to_text_type(".xyz")
         temp_filexyz = tempfile.NamedTemporaryFile(dir=self.temp_dir,
-                                                   prefix="Tmp2", suffix=ext)
+                                                   prefix=self.to_text_type("Tmp2"), suffix=ext)
         self.addCleanup(temp_filexyz.close)
 
         # strip path and extension
@@ -2282,7 +2290,7 @@ def test_pathext_with_empty_str(self):
         program = os.path.splitext(program)[0]
 
         with os_helper.EnvironmentVarGuard() as env:
-            env['PATHEXT'] = f"{ext};"  # note the ;
+            env['PATHEXT'] = f"{ext if isinstance(ext, str) else ext.decode()};"  # note the ;
             rv = shutil.which(program, path=self.temp_dir)
             self.assertEqual(rv, temp_filexyz.name)
 
@@ -2290,13 +2298,14 @@ def test_pathext_with_empty_str(self):
     @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows')
     def test_pathext_applied_on_files_in_path(self):
         with os_helper.EnvironmentVarGuard() as env:
-            env["PATH"] = self.temp_dir
+            env["PATH"] = self.temp_dir if isinstance(self.temp_dir, str) else self.temp_dir.decode()
             env["PATHEXT"] = ".test"
 
-            test_path = pathlib.Path(self.temp_dir) / "test_program.test"
-            test_path.touch(mode=0o755)
+            test_path = os.path.join(self.temp_dir, self.to_text_type("test_program.test"))
+            open(test_path, 'w').close()
+            os.chmod(test_path, 0o755)
 
-            self.assertEqual(shutil.which("test_program"), str(test_path))
+            self.assertEqual(shutil.which(self.to_text_type("test_program")), test_path)
 
     # See GH-75586
     @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows')
@@ -2312,6 +2321,50 @@ def test_win_path_needs_curdir(self):
             self.assertFalse(shutil._win_path_needs_curdir('dontcare', os.X_OK))
             need_curdir_mock.assert_called_once_with('dontcare')
 
+    # See GH-109590
+    @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows')
+    def test_pathext_preferred_for_execute(self):
+        with os_helper.EnvironmentVarGuard() as env:
+            env["PATH"] = self.temp_dir if isinstance(self.temp_dir, str) else self.temp_dir.decode()
+            env["PATHEXT"] = ".test"
+
+            exe = os.path.join(self.temp_dir, self.to_text_type("test.exe"))
+            open(exe, 'w').close()
+            os.chmod(exe, 0o755)
+
+            # default behavior allows a direct match if nothing in PATHEXT matches
+            self.assertEqual(shutil.which(self.to_text_type("test.exe")), exe)
+
+            dot_test = os.path.join(self.temp_dir, self.to_text_type("test.exe.test"))
+            open(dot_test, 'w').close()
+            os.chmod(dot_test, 0o755)
+
+            # now we have a PATHEXT match, so it take precedence
+            self.assertEqual(shutil.which(self.to_text_type("test.exe")), dot_test)
+
+            # but if we don't use os.X_OK we don't change the order based off PATHEXT
+            # and therefore get the direct match.
+            self.assertEqual(shutil.which(self.to_text_type("test.exe"), mode=os.F_OK), exe)
+
+    # See GH-109590
+    @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows')
+    def test_pathext_given_extension_preferred(self):
+        with os_helper.EnvironmentVarGuard() as env:
+            env["PATH"] = self.temp_dir if isinstance(self.temp_dir, str) else self.temp_dir.decode()
+            env["PATHEXT"] = ".exe2;.exe"
+
+            exe = os.path.join(self.temp_dir, self.to_text_type("test.exe"))
+            open(exe, 'w').close()
+            os.chmod(exe, 0o755)
+
+            exe2 = os.path.join(self.temp_dir, self.to_text_type("test.exe2"))
+            open(exe2, 'w').close()
+            os.chmod(exe2, 0o755)
+
+            # even though .exe2 is preferred in PATHEXT, we matched directly to test.exe
+            self.assertEqual(shutil.which(self.to_text_type("test.exe")), exe)
+            self.assertEqual(shutil.which(self.to_text_type("test")), exe2)
+
 
 class TestWhichBytes(TestWhich):
     def setUp(self):
@@ -2319,9 +2372,18 @@ def setUp(self):
         self.dir = os.fsencode(self.dir)
         self.file = os.fsencode(self.file)
         self.temp_file.name = os.fsencode(self.temp_file.name)
+        self.temp_dir = os.fsencode(self.temp_dir)
         self.curdir = os.fsencode(self.curdir)
         self.ext = os.fsencode(self.ext)
 
+    def to_text_type(self, s):
+        '''
+        In this class we're testing with bytes, so convert s to a bytes
+        '''
+        if isinstance(s, str):
+            return s.encode()
+        return s
+
 
 class TestMove(BaseTest, unittest.TestCase):
 
diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py
index 99c4c5cbc4902d..09605f7e774dda 100644
--- a/Lib/test/test_socket.py
+++ b/Lib/test/test_socket.py
@@ -5356,6 +5356,7 @@ def test_create_connection_timeout(self):
 
 
 class NetworkConnectionAttributesTest(SocketTCPTest, ThreadableTest):
+    cli = None
 
     def __init__(self, methodName='runTest'):
         SocketTCPTest.__init__(self, methodName=methodName)
@@ -5365,7 +5366,8 @@ def clientSetUp(self):
         self.source_port = socket_helper.find_unused_port()
 
     def clientTearDown(self):
-        self.cli.close()
+        if self.cli is not None:
+            self.cli.close()
         self.cli = None
         ThreadableTest.clientTearDown(self)
 
diff --git a/Lib/test/test_socketserver.py b/Lib/test/test_socketserver.py
index c81d559cde315d..0f62f9eb200e42 100644
--- a/Lib/test/test_socketserver.py
+++ b/Lib/test/test_socketserver.py
@@ -32,11 +32,6 @@
 HAVE_FORKING = test.support.has_fork_support
 requires_forking = unittest.skipUnless(HAVE_FORKING, 'requires forking')
 
-def signal_alarm(n):
-    """Call signal.alarm when it exists (i.e. not on Windows)."""
-    if hasattr(signal, 'alarm'):
-        signal.alarm(n)
-
 # Remember real select() to avoid interferences with mocking
 _real_select = select.select
 
@@ -68,12 +63,10 @@ class SocketServerTest(unittest.TestCase):
     """Test all socket servers."""
 
     def setUp(self):
-        signal_alarm(60)  # Kill deadlocks after 60 seconds.
         self.port_seed = 0
         self.test_files = []
 
     def tearDown(self):
-        signal_alarm(0)  # Didn't deadlock.
         reap_children()
 
         for fn in self.test_files:
diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py
index f9b0ac2ad7b116..b24fc3c3d077fe 100644
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -2454,6 +2454,11 @@ def f(x):
             data = random.choices(range(100), k=k)
             q1, q2, q3 = quantiles(data, method='inclusive')
             self.assertEqual(q2, statistics.median(data))
+        # Base case with a single data point:  When estimating quantiles from
+        # a sample, we want to be able to add one sample point at a time,
+        # getting increasingly better estimates.
+        self.assertEqual(quantiles([10], n=4), [10.0, 10.0, 10.0])
+        self.assertEqual(quantiles([10], n=4, method='exclusive'), [10.0, 10.0, 10.0])
 
     def test_equal_inputs(self):
         quantiles = statistics.quantiles
@@ -2504,7 +2509,7 @@ def test_error_cases(self):
         with self.assertRaises(ValueError):
             quantiles([10, 20, 30], method='X') # method is unknown
         with self.assertRaises(StatisticsError):
-            quantiles([10], n=4)                # not enough data points
+            quantiles([], n=4)                  # not enough data points
         with self.assertRaises(TypeError):
             quantiles([10, None, 30], n=4)      # data is non-numeric
 
diff --git a/Lib/test/test_structseq.py b/Lib/test/test_structseq.py
index a9fe193028ebe4..6aec63e2603412 100644
--- a/Lib/test/test_structseq.py
+++ b/Lib/test/test_structseq.py
@@ -1,4 +1,7 @@
+import copy
 import os
+import pickle
+import re
 import time
 import unittest
 
@@ -89,10 +92,69 @@ def test_constructor(self):
         self.assertRaises(TypeError, t, "123")
         self.assertRaises(TypeError, t, "123", dict={})
         self.assertRaises(TypeError, t, "123456789", dict=None)
+        self.assertRaises(TypeError, t, seq="123456789", dict={})
+
+        self.assertEqual(t("123456789"), tuple("123456789"))
+        self.assertEqual(t("123456789", {}), tuple("123456789"))
+        self.assertEqual(t("123456789", dict={}), tuple("123456789"))
+        self.assertEqual(t(sequence="123456789", dict={}), tuple("123456789"))
+
+        self.assertEqual(t("1234567890"), tuple("123456789"))
+        self.assertEqual(t("1234567890").tm_zone, "0")
+        self.assertEqual(t("123456789", {"tm_zone": "some zone"}), tuple("123456789"))
+        self.assertEqual(t("123456789", {"tm_zone": "some zone"}).tm_zone, "some zone")
 
         s = "123456789"
         self.assertEqual("".join(t(s)), s)
 
+    def test_constructor_with_duplicate_fields(self):
+        t = time.struct_time
+
+        error_message = re.escape("got duplicate or unexpected field name(s)")
+        with self.assertRaisesRegex(TypeError, error_message):
+            t("1234567890", dict={"tm_zone": "some zone"})
+        with self.assertRaisesRegex(TypeError, error_message):
+            t("1234567890", dict={"tm_zone": "some zone", "tm_mon": 1})
+        with self.assertRaisesRegex(TypeError, error_message):
+            t("1234567890", dict={"error": 0, "tm_zone": "some zone"})
+        with self.assertRaisesRegex(TypeError, error_message):
+            t("1234567890", dict={"error": 0, "tm_zone": "some zone", "tm_mon": 1})
+
+    def test_constructor_with_duplicate_unnamed_fields(self):
+        assert os.stat_result.n_unnamed_fields > 0
+        n_visible_fields = os.stat_result.n_sequence_fields
+
+        r = os.stat_result(range(n_visible_fields), {'st_atime': -1.0})
+        self.assertEqual(r.st_atime, -1.0)
+        self.assertEqual(r, tuple(range(n_visible_fields)))
+
+        r = os.stat_result((*range(n_visible_fields), -1.0))
+        self.assertEqual(r.st_atime, -1.0)
+        self.assertEqual(r, tuple(range(n_visible_fields)))
+
+        with self.assertRaisesRegex(TypeError,
+                                    re.escape("got duplicate or unexpected field name(s)")):
+            os.stat_result((*range(n_visible_fields), -1.0), {'st_atime': -1.0})
+
+    def test_constructor_with_unknown_fields(self):
+        t = time.struct_time
+
+        error_message = re.escape("got duplicate or unexpected field name(s)")
+        with self.assertRaisesRegex(TypeError, error_message):
+            t("123456789", dict={"tm_year": 0})
+        with self.assertRaisesRegex(TypeError, error_message):
+            t("123456789", dict={"tm_year": 0, "tm_mon": 1})
+        with self.assertRaisesRegex(TypeError, error_message):
+            t("123456789", dict={"tm_zone": "some zone", "tm_mon": 1})
+        with self.assertRaisesRegex(TypeError, error_message):
+            t("123456789", dict={"tm_zone": "some zone", "error": 0})
+        with self.assertRaisesRegex(TypeError, error_message):
+            t("123456789", dict={"error": 0, "tm_zone": "some zone", "tm_mon": 1})
+        with self.assertRaisesRegex(TypeError, error_message):
+            t("123456789", dict={"error": 0})
+        with self.assertRaisesRegex(TypeError, error_message):
+            t("123456789", dict={"tm_zone": "some zone", "error": 0})
+
     def test_eviltuple(self):
         class Exc(Exception):
             pass
@@ -106,9 +168,78 @@ def __len__(self):
 
         self.assertRaises(Exc, time.struct_time, C())
 
-    def test_reduce(self):
+    def test_pickling(self):
         t = time.gmtime()
-        x = t.__reduce__()
+        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
+            p = pickle.dumps(t, proto)
+            t2 = pickle.loads(p)
+            self.assertEqual(t2.__class__, t.__class__)
+            self.assertEqual(t2, t)
+            self.assertEqual(t2.tm_year, t.tm_year)
+            self.assertEqual(t2.tm_zone, t.tm_zone)
+
+    def test_pickling_with_unnamed_fields(self):
+        assert os.stat_result.n_unnamed_fields > 0
+
+        r = os.stat_result(range(os.stat_result.n_sequence_fields),
+                           {'st_atime': 1.0, 'st_atime_ns': 2.0})
+        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
+            p = pickle.dumps(r, proto)
+            r2 = pickle.loads(p)
+            self.assertEqual(r2.__class__, r.__class__)
+            self.assertEqual(r2, r)
+            self.assertEqual(r2.st_mode, r.st_mode)
+            self.assertEqual(r2.st_atime, r.st_atime)
+            self.assertEqual(r2.st_atime_ns, r.st_atime_ns)
+
+    def test_copying(self):
+        n_fields = time.struct_time.n_fields
+        t = time.struct_time([[i] for i in range(n_fields)])
+
+        t2 = copy.copy(t)
+        self.assertEqual(t2.__class__, t.__class__)
+        self.assertEqual(t2, t)
+        self.assertEqual(t2.tm_year, t.tm_year)
+        self.assertEqual(t2.tm_zone, t.tm_zone)
+        self.assertIs(t2[0], t[0])
+        self.assertIs(t2.tm_year, t.tm_year)
+
+        t3 = copy.deepcopy(t)
+        self.assertEqual(t3.__class__, t.__class__)
+        self.assertEqual(t3, t)
+        self.assertEqual(t3.tm_year, t.tm_year)
+        self.assertEqual(t3.tm_zone, t.tm_zone)
+        self.assertIsNot(t3[0], t[0])
+        self.assertIsNot(t3.tm_year, t.tm_year)
+
+    def test_copying_with_unnamed_fields(self):
+        assert os.stat_result.n_unnamed_fields > 0
+
+        n_sequence_fields = os.stat_result.n_sequence_fields
+        r = os.stat_result([[i] for i in range(n_sequence_fields)],
+                           {'st_atime': [1.0], 'st_atime_ns': [2.0]})
+
+        r2 = copy.copy(r)
+        self.assertEqual(r2.__class__, r.__class__)
+        self.assertEqual(r2, r)
+        self.assertEqual(r2.st_mode, r.st_mode)
+        self.assertEqual(r2.st_atime, r.st_atime)
+        self.assertEqual(r2.st_atime_ns, r.st_atime_ns)
+        self.assertIs(r2[0], r[0])
+        self.assertIs(r2.st_mode, r.st_mode)
+        self.assertIs(r2.st_atime, r.st_atime)
+        self.assertIs(r2.st_atime_ns, r.st_atime_ns)
+
+        r3 = copy.deepcopy(r)
+        self.assertEqual(r3.__class__, r.__class__)
+        self.assertEqual(r3, r)
+        self.assertEqual(r3.st_mode, r.st_mode)
+        self.assertEqual(r3.st_atime, r.st_atime)
+        self.assertEqual(r3.st_atime_ns, r.st_atime_ns)
+        self.assertIsNot(r3[0], r[0])
+        self.assertIsNot(r3.st_mode, r.st_mode)
+        self.assertIsNot(r3.st_atime, r.st_atime)
+        self.assertIsNot(r3.st_atime_ns, r.st_atime_ns)
 
     def test_extended_getslice(self):
         # Test extended slicing by comparing with list slicing.
@@ -133,6 +264,84 @@ def test_match_args_with_unnamed_fields(self):
         self.assertEqual(os.stat_result.n_unnamed_fields, 3)
         self.assertEqual(os.stat_result.__match_args__, expected_args)
 
+    def test_copy_replace_all_fields_visible(self):
+        assert os.times_result.n_unnamed_fields == 0
+        assert os.times_result.n_sequence_fields == os.times_result.n_fields
+
+        t = os.times()
+
+        # visible fields
+        self.assertEqual(copy.replace(t), t)
+        self.assertIsInstance(copy.replace(t), os.times_result)
+        self.assertEqual(copy.replace(t, user=1.5), (1.5, *t[1:]))
+        self.assertEqual(copy.replace(t, system=2.5), (t[0], 2.5, *t[2:]))
+        self.assertEqual(copy.replace(t, user=1.5, system=2.5), (1.5, 2.5, *t[2:]))
+
+        # unknown fields
+        with self.assertRaisesRegex(TypeError, 'unexpected field name'):
+            copy.replace(t, error=-1)
+        with self.assertRaisesRegex(TypeError, 'unexpected field name'):
+            copy.replace(t, user=1, error=-1)
+
+    def test_copy_replace_with_invisible_fields(self):
+        assert time.struct_time.n_unnamed_fields == 0
+        assert time.struct_time.n_sequence_fields < time.struct_time.n_fields
+
+        t = time.gmtime(0)
+
+        # visible fields
+        t2 = copy.replace(t)
+        self.assertEqual(t2, (1970, 1, 1, 0, 0, 0, 3, 1, 0))
+        self.assertIsInstance(t2, time.struct_time)
+        t3 = copy.replace(t, tm_year=2000)
+        self.assertEqual(t3, (2000, 1, 1, 0, 0, 0, 3, 1, 0))
+        self.assertEqual(t3.tm_year, 2000)
+        t4 = copy.replace(t, tm_mon=2)
+        self.assertEqual(t4, (1970, 2, 1, 0, 0, 0, 3, 1, 0))
+        self.assertEqual(t4.tm_mon, 2)
+        t5 = copy.replace(t, tm_year=2000, tm_mon=2)
+        self.assertEqual(t5, (2000, 2, 1, 0, 0, 0, 3, 1, 0))
+        self.assertEqual(t5.tm_year, 2000)
+        self.assertEqual(t5.tm_mon, 2)
+
+        # named invisible fields
+        self.assertTrue(hasattr(t, 'tm_zone'), f"{t} has no attribute 'tm_zone'")
+        with self.assertRaisesRegex(AttributeError, 'readonly attribute'):
+            t.tm_zone = 'some other zone'
+        self.assertEqual(t2.tm_zone, t.tm_zone)
+        self.assertEqual(t3.tm_zone, t.tm_zone)
+        self.assertEqual(t4.tm_zone, t.tm_zone)
+        t6 = copy.replace(t, tm_zone='some other zone')
+        self.assertEqual(t, t6)
+        self.assertEqual(t6.tm_zone, 'some other zone')
+        t7 = copy.replace(t, tm_year=2000, tm_zone='some other zone')
+        self.assertEqual(t7, (2000, 1, 1, 0, 0, 0, 3, 1, 0))
+        self.assertEqual(t7.tm_year, 2000)
+        self.assertEqual(t7.tm_zone, 'some other zone')
+
+        # unknown fields
+        with self.assertRaisesRegex(TypeError, 'unexpected field name'):
+            copy.replace(t, error=2)
+        with self.assertRaisesRegex(TypeError, 'unexpected field name'):
+            copy.replace(t, tm_year=2000, error=2)
+        with self.assertRaisesRegex(TypeError, 'unexpected field name'):
+            copy.replace(t, tm_zone='some other zone', error=2)
+
+    def test_copy_replace_with_unnamed_fields(self):
+        assert os.stat_result.n_unnamed_fields > 0
+
+        r = os.stat_result(range(os.stat_result.n_sequence_fields))
+
+        error_message = re.escape('__replace__() is not supported')
+        with self.assertRaisesRegex(TypeError, error_message):
+            copy.replace(r)
+        with self.assertRaisesRegex(TypeError, error_message):
+            copy.replace(r, st_mode=1)
+        with self.assertRaisesRegex(TypeError, error_message):
+            copy.replace(r, error=2)
+        with self.assertRaisesRegex(TypeError, error_message):
+            copy.replace(r, st_mode=1, error=2)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/test/test_support.py b/Lib/test/test_support.py
index 902bec78451307..97de81677b10bc 100644
--- a/Lib/test/test_support.py
+++ b/Lib/test/test_support.py
@@ -832,7 +832,7 @@ def test_copy_python_src_ignore(self):
         self.assertEqual(support.copy_python_src_ignore(path, os.listdir(path)),
                          ignored | {'build', 'venv'})
 
-        # An other directory
+        # Another directory
         path = os.path.join(src_dir, 'Objects')
         self.assertEqual(support.copy_python_src_ignore(path, os.listdir(path)),
                          ignored)
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 16050171ad139d..ae241d7a502749 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -5,6 +5,7 @@
 import locale
 import operator
 import os
+import random
 import struct
 import subprocess
 import sys
@@ -30,10 +31,6 @@ def requires_subinterpreters(meth):
                            'subinterpreters required')(meth)
 
 
-# count the number of test runs, used to create unique
-# strings to intern in test_intern()
-INTERN_NUMRUNS = 0
-
 DICT_KEY_STRUCT_FORMAT = 'n2BI2n'
 
 class DisplayHookTest(unittest.TestCase):
@@ -696,10 +693,8 @@ def test_43581(self):
         self.assertEqual(sys.__stdout__.encoding, sys.__stderr__.encoding)
 
     def test_intern(self):
-        global INTERN_NUMRUNS
-        INTERN_NUMRUNS += 1
         self.assertRaises(TypeError, sys.intern)
-        s = "never interned before" + str(INTERN_NUMRUNS)
+        s = "never interned before" + str(random.randrange(0, 10**9))
         self.assertTrue(sys.intern(s) is s)
         s2 = s.swapcase().swapcase()
         self.assertTrue(sys.intern(s2) is s)
@@ -717,9 +712,7 @@ def __hash__(self):
 
     @requires_subinterpreters
     def test_subinterp_intern_dynamically_allocated(self):
-        global INTERN_NUMRUNS
-        INTERN_NUMRUNS += 1
-        s = "never interned before" + str(INTERN_NUMRUNS)
+        s = "never interned before" + str(random.randrange(0, 10**9))
         t = sys.intern(s)
         self.assertIs(t, s)
 
diff --git a/Lib/test/test_sysconfig.py b/Lib/test/test_sysconfig.py
index b6dbf3d52cb4c3..a077ac5349fdc6 100644
--- a/Lib/test/test_sysconfig.py
+++ b/Lib/test/test_sysconfig.py
@@ -17,7 +17,9 @@
                        get_path, get_path_names, _INSTALL_SCHEMES,
                        get_default_scheme, get_scheme_names, get_config_var,
                        _expand_vars, _get_preferred_schemes, _main)
+import _imp
 import _osx_support
+import _sysconfig
 
 
 HAS_USER_BASE = sysconfig._HAS_USER_BASE
@@ -394,6 +396,24 @@ def test_ldshared_value(self):
 
         self.assertIn(ldflags, ldshared)
 
+    @unittest.skipIf(not _imp.extension_suffixes(), "stub loader has no suffixes")
+    def test_soabi(self):
+        soabi = sysconfig.get_config_var('SOABI')
+        self.assertIn(soabi, _imp.extension_suffixes()[0])
+
+    def test_library(self):
+        library = sysconfig.get_config_var('LIBRARY')
+        ldlibrary = sysconfig.get_config_var('LDLIBRARY')
+        major, minor = sys.version_info[:2]
+        if sys.platform == 'win32':
+            self.assertTrue(library.startswith(f'python{major}{minor}'))
+            self.assertTrue(library.endswith('.dll'))
+            self.assertEqual(library, ldlibrary)
+        else:
+            self.assertTrue(library.startswith(f'libpython{major}.{minor}'))
+            self.assertTrue(library.endswith('.a'))
+            self.assertTrue(ldlibrary.startswith(f'libpython{major}.{minor}'))
+
     @unittest.skipUnless(sys.platform == "darwin", "test only relevant on MacOSX")
     @requires_subprocess()
     def test_platform_in_subprocess(self):
@@ -472,10 +492,8 @@ def test_srcdir_independent_of_cwd(self):
 
     @unittest.skipIf(sysconfig.get_config_var('EXT_SUFFIX') is None,
                      'EXT_SUFFIX required for this test')
+    @unittest.skipIf(not _imp.extension_suffixes(), "stub loader has no suffixes")
     def test_EXT_SUFFIX_in_vars(self):
-        import _imp
-        if not _imp.extension_suffixes():
-            self.skipTest("stub loader has no suffixes")
         vars = sysconfig.get_config_vars()
         self.assertEqual(vars['EXT_SUFFIX'], _imp.extension_suffixes()[0])
 
diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py
index 13bfacbac83f13..f8b81942cf1732 100644
--- a/Lib/test/test_threading.py
+++ b/Lib/test/test_threading.py
@@ -26,6 +26,11 @@
 from test import lock_tests
 from test import support
 
+try:
+    from test.support import interpreters
+except ModuleNotFoundError:
+    interpreters = None
+
 threading_helper.requires_working_threading(module=True)
 
 # Between fork() and exec(), only async-safe functions are allowed (issues
@@ -52,6 +57,12 @@ def skip_unless_reliable_fork(test):
     return test
 
 
+def requires_subinterpreters(meth):
+    """Decorator to skip a test if subinterpreters are not supported."""
+    return unittest.skipIf(interpreters is None,
+                           'subinterpreters required')(meth)
+
+
 def restore_default_excepthook(testcase):
     testcase.addCleanup(setattr, threading, 'excepthook', threading.excepthook)
     threading.excepthook = threading.__excepthook__
@@ -1311,6 +1322,44 @@ def f():
         # The thread was joined properly.
         self.assertEqual(os.read(r, 1), b"x")
 
+    @requires_subinterpreters
+    def test_threads_join_with_no_main(self):
+        r_interp, w_interp = self.pipe()
+
+        INTERP = b'I'
+        FINI = b'F'
+        DONE = b'D'
+
+        interp = interpreters.create()
+        interp.run(f"""if True:
+            import os
+            import threading
+            import time
+
+            done = False
+
+            def notify_fini():
+                global done
+                done = True
+                os.write({w_interp}, {FINI!r})
+                t.join()
+            threading._register_atexit(notify_fini)
+
+            def task():
+                while not done:
+                    time.sleep(0.1)
+                os.write({w_interp}, {DONE!r})
+            t = threading.Thread(target=task)
+            t.start()
+
+            os.write({w_interp}, {INTERP!r})
+            """)
+        interp.close()
+
+        self.assertEqual(os.read(r_interp, 1), INTERP)
+        self.assertEqual(os.read(r_interp, 1), FINI)
+        self.assertEqual(os.read(r_interp, 1), DONE)
+
     @cpython_only
     def test_daemon_threads_fatal_error(self):
         subinterp_code = f"""if 1:
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 94fb6d933de114..9369560788719f 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -566,6 +566,55 @@ def test_string(self):
     OP         '='           (3, 0) (3, 1)
     OP         '}'           (3, 1) (3, 2)
     FSTRING_END "'''"         (3, 2) (3, 5)
+    """)
+        self.check_tokenize("""\
+f'''__{
+    x:a
+}__'''""", """\
+    FSTRING_START "f'''"        (1, 0) (1, 4)
+    FSTRING_MIDDLE '__'          (1, 4) (1, 6)
+    OP         '{'           (1, 6) (1, 7)
+    NL         '\\n'          (1, 7) (1, 8)
+    NAME       'x'           (2, 4) (2, 5)
+    OP         ':'           (2, 5) (2, 6)
+    FSTRING_MIDDLE 'a\\n'         (2, 6) (3, 0)
+    OP         '}'           (3, 0) (3, 1)
+    FSTRING_MIDDLE '__'          (3, 1) (3, 3)
+    FSTRING_END "'''"         (3, 3) (3, 6)
+    """)
+        self.check_tokenize("""\
+f'''__{
+    x:a
+    b
+     c
+      d
+}__'''""", """\
+    FSTRING_START "f'''"        (1, 0) (1, 4)
+    FSTRING_MIDDLE '__'          (1, 4) (1, 6)
+    OP         '{'           (1, 6) (1, 7)
+    NL         '\\n'          (1, 7) (1, 8)
+    NAME       'x'           (2, 4) (2, 5)
+    OP         ':'           (2, 5) (2, 6)
+    FSTRING_MIDDLE 'a\\n    b\\n     c\\n      d\\n' (2, 6) (6, 0)
+    OP         '}'           (6, 0) (6, 1)
+    FSTRING_MIDDLE '__'          (6, 1) (6, 3)
+    FSTRING_END "'''"         (6, 3) (6, 6)
+    """)
+        self.check_tokenize("""\
+f'__{
+    x:d
+}__'""", """\
+    FSTRING_START "f'"          (1, 0) (1, 2)
+    FSTRING_MIDDLE '__'          (1, 2) (1, 4)
+    OP         '{'           (1, 4) (1, 5)
+    NL         '\\n'          (1, 5) (1, 6)
+    NAME       'x'           (2, 4) (2, 5)
+    OP         ':'           (2, 5) (2, 6)
+    FSTRING_MIDDLE 'd'           (2, 6) (2, 7)
+    NL         '\\n'          (2, 7) (2, 8)
+    OP         '}'           (3, 0) (3, 1)
+    FSTRING_MIDDLE '__'          (3, 1) (3, 3)
+    FSTRING_END "'"           (3, 3) (3, 4)
     """)
 
     def test_function(self):
@@ -2277,6 +2326,54 @@ def test_string(self):
     FSTRING_START \'f"\'          (1, 0) (1, 2)
     FSTRING_MIDDLE 'hola\\\\\\\\\\\\r\\\\ndfgf' (1, 2) (1, 16)
     FSTRING_END \'"\'           (1, 16) (1, 17)
+    """)
+
+        self.check_tokenize("""\
+f'''__{
+    x:a
+}__'''""", """\
+    FSTRING_START "f'''"        (1, 0) (1, 4)
+    FSTRING_MIDDLE '__'          (1, 4) (1, 6)
+    LBRACE     '{'           (1, 6) (1, 7)
+    NAME       'x'           (2, 4) (2, 5)
+    COLON      ':'           (2, 5) (2, 6)
+    FSTRING_MIDDLE 'a\\n'         (2, 6) (3, 0)
+    RBRACE     '}'           (3, 0) (3, 1)
+    FSTRING_MIDDLE '__'          (3, 1) (3, 3)
+    FSTRING_END "'''"         (3, 3) (3, 6)
+    """)
+
+        self.check_tokenize("""\
+f'''__{
+    x:a
+    b
+     c
+      d
+}__'''""", """\
+    FSTRING_START "f'''"        (1, 0) (1, 4)
+    FSTRING_MIDDLE '__'          (1, 4) (1, 6)
+    LBRACE     '{'           (1, 6) (1, 7)
+    NAME       'x'           (2, 4) (2, 5)
+    COLON      ':'           (2, 5) (2, 6)
+    FSTRING_MIDDLE 'a\\n    b\\n     c\\n      d\\n' (2, 6) (6, 0)
+    RBRACE     '}'           (6, 0) (6, 1)
+    FSTRING_MIDDLE '__'          (6, 1) (6, 3)
+    FSTRING_END "'''"         (6, 3) (6, 6)
+    """)
+
+        self.check_tokenize("""\
+f'__{
+    x:d
+}__'""", """\
+    FSTRING_START "f'"          (1, 0) (1, 2)
+    FSTRING_MIDDLE '__'          (1, 2) (1, 4)
+    LBRACE     '{'           (1, 4) (1, 5)
+    NAME       'x'           (2, 4) (2, 5)
+    COLON      ':'           (2, 5) (2, 6)
+    FSTRING_MIDDLE 'd'           (2, 6) (2, 7)
+    RBRACE     '}'           (3, 0) (3, 1)
+    FSTRING_MIDDLE '__'          (3, 1) (3, 3)
+    FSTRING_END "'"           (3, 3) (3, 4)
     """)
 
     def test_function(self):
diff --git a/Lib/test/test_typing.py b/Lib/test/test_typing.py
index 9e891f113840be..c24cf3bc776fc1 100644
--- a/Lib/test/test_typing.py
+++ b/Lib/test/test_typing.py
@@ -185,7 +185,7 @@ def test_cannot_subclass(self):
             class A(self.bottom_type):
                 pass
         with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE):
-            class A(type(self.bottom_type)):
+            class B(type(self.bottom_type)):
                 pass
 
     def test_cannot_instantiate(self):
@@ -282,7 +282,7 @@ class C(type(Self)):
                 pass
         with self.assertRaisesRegex(TypeError,
                 r'Cannot subclass typing\.Self'):
-            class C(Self):
+            class D(Self):
                 pass
 
     def test_cannot_init(self):
@@ -339,7 +339,7 @@ class C(type(LiteralString)):
                 pass
         with self.assertRaisesRegex(TypeError,
                 r'Cannot subclass typing\.LiteralString'):
-            class C(LiteralString):
+            class D(LiteralString):
                 pass
 
     def test_cannot_init(self):
@@ -483,7 +483,7 @@ class V(TypeVar): pass
         T = TypeVar("T")
         with self.assertRaisesRegex(TypeError,
                 CANNOT_SUBCLASS_INSTANCE % 'TypeVar'):
-            class V(T): pass
+            class W(T): pass
 
     def test_cannot_instantiate_vars(self):
         with self.assertRaises(TypeError):
@@ -550,7 +550,7 @@ def test_many_weakrefs(self):
             with self.subTest(cls=cls):
                 vals = weakref.WeakValueDictionary()
 
-                for x in range(100000):
+                for x in range(10):
                     vals[x] = cls(str(x))
                 del vals
 
@@ -1244,20 +1244,20 @@ class C(TypeVarTuple): pass
         Ts = TypeVarTuple('Ts')
         with self.assertRaisesRegex(TypeError,
                 CANNOT_SUBCLASS_INSTANCE % 'TypeVarTuple'):
-            class C(Ts): pass
+            class D(Ts): pass
         with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE):
-            class C(type(Unpack)): pass
+            class E(type(Unpack)): pass
         with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE):
-            class C(type(*Ts)): pass
+            class F(type(*Ts)): pass
         with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE):
-            class C(type(Unpack[Ts])): pass
+            class G(type(Unpack[Ts])): pass
         with self.assertRaisesRegex(TypeError,
                                     r'Cannot subclass typing\.Unpack'):
-            class C(Unpack): pass
+            class H(Unpack): pass
         with self.assertRaisesRegex(TypeError, r'Cannot subclass typing.Unpack\[Ts\]'):
-            class C(*Ts): pass
+            class I(*Ts): pass
         with self.assertRaisesRegex(TypeError, r'Cannot subclass typing.Unpack\[Ts\]'):
-            class C(Unpack[Ts]): pass
+            class J(Unpack[Ts]): pass
 
     def test_variadic_class_args_are_correct(self):
         T = TypeVar('T')
@@ -1431,12 +1431,12 @@ def test_variadic_class_with_duplicate_typevartuples_fails(self):
         with self.assertRaises(TypeError):
             class C(Generic[*Ts1, *Ts1]): pass
         with self.assertRaises(TypeError):
-            class C(Generic[Unpack[Ts1], Unpack[Ts1]]): pass
+            class D(Generic[Unpack[Ts1], Unpack[Ts1]]): pass
 
         with self.assertRaises(TypeError):
-            class C(Generic[*Ts1, *Ts2, *Ts1]): pass
+            class E(Generic[*Ts1, *Ts2, *Ts1]): pass
         with self.assertRaises(TypeError):
-            class C(Generic[Unpack[Ts1], Unpack[Ts2], Unpack[Ts1]]): pass
+            class F(Generic[Unpack[Ts1], Unpack[Ts2], Unpack[Ts1]]): pass
 
     def test_type_concatenation_in_variadic_class_argument_list_succeeds(self):
         Ts = TypeVarTuple('Ts')
@@ -1804,11 +1804,11 @@ def test_cannot_subclass(self):
             class C(Union):
                 pass
         with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE):
-            class C(type(Union)):
+            class D(type(Union)):
                 pass
         with self.assertRaisesRegex(TypeError,
                 r'Cannot subclass typing\.Union\[int, str\]'):
-            class C(Union[int, str]):
+            class E(Union[int, str]):
                 pass
 
     def test_cannot_instantiate(self):
@@ -2557,10 +2557,10 @@ class BP(Protocol): pass
             class P(C, Protocol):
                 pass
         with self.assertRaises(TypeError):
-            class P(Protocol, C):
+            class Q(Protocol, C):
                 pass
         with self.assertRaises(TypeError):
-            class P(BP, C, Protocol):
+            class R(BP, C, Protocol):
                 pass
 
         class D(BP, C): pass
@@ -2836,7 +2836,7 @@ class NotAProtocolButAnImplicitSubclass3:
             meth: Callable[[], None]
             meth2: Callable[[int, str], bool]
             def meth(self): pass
-            def meth(self, x, y): return True
+            def meth2(self, x, y): return True
 
         self.assertNotIsSubclass(AnnotatedButNotAProtocol, CallableMembersProto)
         self.assertIsSubclass(NotAProtocolButAnImplicitSubclass, CallableMembersProto)
@@ -3658,11 +3658,11 @@ def test_protocols_bad_subscripts(self):
         with self.assertRaises(TypeError):
             class P(Protocol[T, T]): pass
         with self.assertRaises(TypeError):
-            class P(Protocol[int]): pass
+            class Q(Protocol[int]): pass
         with self.assertRaises(TypeError):
-            class P(Protocol[T], Protocol[S]): pass
+            class R(Protocol[T], Protocol[S]): pass
         with self.assertRaises(TypeError):
-            class P(typing.Mapping[T, S], Protocol[T]): pass
+            class S(typing.Mapping[T, S], Protocol[T]): pass
 
     def test_generic_protocols_repr(self):
         T = TypeVar('T')
@@ -4094,12 +4094,12 @@ class NewGeneric(Generic): ...
         with self.assertRaises(TypeError):
             class MyGeneric(Generic[T], Generic[S]): ...
         with self.assertRaises(TypeError):
-            class MyGeneric(List[T], Generic[S]): ...
+            class MyGeneric2(List[T], Generic[S]): ...
         with self.assertRaises(TypeError):
             Generic[()]
-        class C(Generic[T]): pass
+        class D(Generic[T]): pass
         with self.assertRaises(TypeError):
-            C[()]
+            D[()]
 
     def test_generic_subclass_checks(self):
         for typ in [list[int], List[int],
@@ -4836,7 +4836,7 @@ class Test(Generic[T], Final):
             class Subclass(Test):
                 pass
         with self.assertRaises(FinalException):
-            class Subclass(Test[int]):
+            class Subclass2(Test[int]):
                 pass
 
     def test_nested(self):
@@ -5074,15 +5074,15 @@ def test_cannot_subclass(self):
             class C(type(ClassVar)):
                 pass
         with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE):
-            class C(type(ClassVar[int])):
+            class D(type(ClassVar[int])):
                 pass
         with self.assertRaisesRegex(TypeError,
                                     r'Cannot subclass typing\.ClassVar'):
-            class C(ClassVar):
+            class E(ClassVar):
                 pass
         with self.assertRaisesRegex(TypeError,
                                     r'Cannot subclass typing\.ClassVar\[int\]'):
-            class C(ClassVar[int]):
+            class F(ClassVar[int]):
                 pass
 
     def test_cannot_init(self):
@@ -5124,15 +5124,15 @@ def test_cannot_subclass(self):
             class C(type(Final)):
                 pass
         with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE):
-            class C(type(Final[int])):
+            class D(type(Final[int])):
                 pass
         with self.assertRaisesRegex(TypeError,
                 r'Cannot subclass typing\.Final'):
-            class C(Final):
+            class E(Final):
                 pass
         with self.assertRaisesRegex(TypeError,
                 r'Cannot subclass typing\.Final\[int\]'):
-            class C(Final[int]):
+            class F(Final[int]):
                 pass
 
     def test_cannot_init(self):
@@ -7265,15 +7265,15 @@ class A:
             class X(NamedTuple, A):
                 x: int
         with self.assertRaises(TypeError):
-            class X(NamedTuple, tuple):
+            class Y(NamedTuple, tuple):
                 x: int
         with self.assertRaises(TypeError):
-            class X(NamedTuple, NamedTuple):
+            class Z(NamedTuple, NamedTuple):
                 x: int
-        class A(NamedTuple):
+        class B(NamedTuple):
             x: int
         with self.assertRaises(TypeError):
-            class X(NamedTuple, A):
+            class C(NamedTuple, B):
                 y: str
 
     def test_generic(self):
@@ -8037,15 +8037,15 @@ def test_cannot_subclass(self):
             class C(type(Required)):
                 pass
         with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE):
-            class C(type(Required[int])):
+            class D(type(Required[int])):
                 pass
         with self.assertRaisesRegex(TypeError,
                 r'Cannot subclass typing\.Required'):
-            class C(Required):
+            class E(Required):
                 pass
         with self.assertRaisesRegex(TypeError,
                 r'Cannot subclass typing\.Required\[int\]'):
-            class C(Required[int]):
+            class F(Required[int]):
                 pass
 
     def test_cannot_init(self):
@@ -8085,15 +8085,15 @@ def test_cannot_subclass(self):
             class C(type(NotRequired)):
                 pass
         with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE):
-            class C(type(NotRequired[int])):
+            class D(type(NotRequired[int])):
                 pass
         with self.assertRaisesRegex(TypeError,
                 r'Cannot subclass typing\.NotRequired'):
-            class C(NotRequired):
+            class E(NotRequired):
                 pass
         with self.assertRaisesRegex(TypeError,
                 r'Cannot subclass typing\.NotRequired\[int\]'):
-            class C(NotRequired[int]):
+            class F(NotRequired[int]):
                 pass
 
     def test_cannot_init(self):
@@ -8192,7 +8192,7 @@ class A(typing.Match):
             TypeError,
             r"type 're\.Pattern' is not an acceptable base type",
         ):
-            class A(typing.Pattern):
+            class B(typing.Pattern):
                 pass
 
 
@@ -8539,7 +8539,7 @@ class C(TypeAlias):
                 pass
 
         with self.assertRaises(TypeError):
-            class C(type(TypeAlias)):
+            class D(type(TypeAlias)):
                 pass
 
     def test_repr(self):
@@ -8929,19 +8929,19 @@ def test_cannot_subclass(self):
         with self.assertRaisesRegex(TypeError, NOT_A_BASE_TYPE % 'ParamSpec'):
             class C(ParamSpec): pass
         with self.assertRaisesRegex(TypeError, NOT_A_BASE_TYPE % 'ParamSpecArgs'):
-            class C(ParamSpecArgs): pass
+            class D(ParamSpecArgs): pass
         with self.assertRaisesRegex(TypeError, NOT_A_BASE_TYPE % 'ParamSpecKwargs'):
-            class C(ParamSpecKwargs): pass
+            class E(ParamSpecKwargs): pass
         P = ParamSpec('P')
         with self.assertRaisesRegex(TypeError,
                 CANNOT_SUBCLASS_INSTANCE % 'ParamSpec'):
-            class C(P): pass
+            class F(P): pass
         with self.assertRaisesRegex(TypeError,
                 CANNOT_SUBCLASS_INSTANCE % 'ParamSpecArgs'):
-            class C(P.args): pass
+            class G(P.args): pass
         with self.assertRaisesRegex(TypeError,
                 CANNOT_SUBCLASS_INSTANCE % 'ParamSpecKwargs'):
-            class C(P.kwargs): pass
+            class H(P.kwargs): pass
 
 
 class ConcatenateTests(BaseTestCase):
@@ -9022,15 +9022,15 @@ def test_cannot_subclass(self):
             class C(type(TypeGuard)):
                 pass
         with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE):
-            class C(type(TypeGuard[int])):
+            class D(type(TypeGuard[int])):
                 pass
         with self.assertRaisesRegex(TypeError,
                                     r'Cannot subclass typing\.TypeGuard'):
-            class C(TypeGuard):
+            class E(TypeGuard):
                 pass
         with self.assertRaisesRegex(TypeError,
                                     r'Cannot subclass typing\.TypeGuard\[int\]'):
-            class C(TypeGuard[int]):
+            class F(TypeGuard[int]):
                 pass
 
     def test_cannot_init(self):
diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py
index bdf7b0588bee67..6f698a8d891815 100644
--- a/Lib/test/test_unparse.py
+++ b/Lib/test/test_unparse.py
@@ -730,7 +730,8 @@ class DirectoryTestCase(ASTTestCase):
     test_directories = (lib_dir, lib_dir / "test")
     run_always_files = {"test_grammar.py", "test_syntax.py", "test_compile.py",
                         "test_ast.py", "test_asdl_parser.py", "test_fstring.py",
-                        "test_patma.py", "test_type_alias.py", "test_type_params.py"}
+                        "test_patma.py", "test_type_alias.py", "test_type_params.py",
+                        "test_tokenize.py"}
 
     _files_to_test = None
 
diff --git a/Lib/test/test_venv.py b/Lib/test/test_venv.py
index 0ffe3e1d0cc498..890672c5d27eec 100644
--- a/Lib/test/test_venv.py
+++ b/Lib/test/test_venv.py
@@ -569,7 +569,11 @@ def test_zippath_from_non_installed_posix(self):
                         eachpath,
                         os.path.join(non_installed_dir, platlibdir))
             elif os.path.isfile(os.path.join(eachpath, "os.py")):
-                for name in os.listdir(eachpath):
+                names = os.listdir(eachpath)
+                ignored_names = copy_python_src_ignore(eachpath, names)
+                for name in names:
+                    if name in ignored_names:
+                        continue
                     if name == "site-packages":
                         continue
                     fn = os.path.join(eachpath, name)
diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py
index 9a099adc74f4b4..1dc8b91a453f92 100644
--- a/Lib/test/test_zlib.py
+++ b/Lib/test/test_zlib.py
@@ -512,18 +512,7 @@ def test_odd_flush(self):
 
         # Try 17K of data
         # generate random data stream
-        try:
-            # In 2.3 and later, WichmannHill is the RNG of the bug report
-            gen = random.WichmannHill()
-        except AttributeError:
-            try:
-                # 2.2 called it Random
-                gen = random.Random()
-            except AttributeError:
-                # others might simply have a single RNG
-                gen = random
-        gen.seed(1)
-        data = gen.randbytes(17 * 1024)
+        data = random.randbytes(17 * 1024)
 
         # compress, sync-flush, and decompress
         first = co.compress(data)
diff --git a/Lib/threading.py b/Lib/threading.py
index 31cefd2143a8c4..41c3a9ff93856f 100644
--- a/Lib/threading.py
+++ b/Lib/threading.py
@@ -38,6 +38,7 @@
 _allocate_lock = _thread.allocate_lock
 _set_sentinel = _thread._set_sentinel
 get_ident = _thread.get_ident
+_is_main_interpreter = _thread._is_main_interpreter
 try:
     get_native_id = _thread.get_native_id
     _HAVE_THREAD_NATIVE_ID = True
@@ -1574,7 +1575,7 @@ def _shutdown():
     # the main thread's tstate_lock - that won't happen until the interpreter
     # is nearly dead.  So we release it here.  Note that just calling _stop()
     # isn't enough:  other threads may already be waiting on _tstate_lock.
-    if _main_thread._is_stopped:
+    if _main_thread._is_stopped and _is_main_interpreter():
         # _shutdown() was already called
         return
 
@@ -1592,8 +1593,11 @@ def _shutdown():
         # The main thread isn't finished yet, so its thread state lock can't
         # have been released.
         assert tlock is not None
-        assert tlock.locked()
-        tlock.release()
+        if tlock.locked():
+            # It should have been released already by
+            # _PyInterpreterState_SetNotRunningMain(), but there may be
+            # embedders that aren't calling that yet.
+            tlock.release()
         _main_thread._stop()
     else:
         # bpo-1596321: _shutdown() must be called in the main thread.
@@ -1627,6 +1631,7 @@ def main_thread():
     In normal conditions, the main thread is the thread from which the
     Python interpreter was started.
     """
+    # XXX Figure this out for subinterpreters.  (See gh-75698.)
     return _main_thread
 
 # get thread-local implementation, either from the thread
diff --git a/Lib/typing.py b/Lib/typing.py
index 639be75747dae0..d1f371377b88f8 100644
--- a/Lib/typing.py
+++ b/Lib/typing.py
@@ -937,13 +937,6 @@ def _is_typevar_like(x: Any) -> bool:
     return isinstance(x, (TypeVar, ParamSpec)) or _is_unpacked_typevartuple(x)
 
 
-class _PickleUsingNameMixin:
-    """Mixin enabling pickling based on self.__name__."""
-
-    def __reduce__(self):
-        return self.__name__
-
-
 def _typevar_subst(self, arg):
     msg = "Parameters to generic types must be types."
     arg = _type_check(arg, msg, is_argument=True)
diff --git a/Makefile.pre.in b/Makefile.pre.in
index fa5b9e6654c26c..97eb767b8fcdeb 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -657,7 +657,7 @@ profile-run-stamp:
 	$(MAKE) profile-gen-stamp
 	# Next, run the profile task to generate the profile information.
 	@ # FIXME: can't run for a cross build
-	$(LLVM_PROF_FILE) $(RUNSHARED) ./$(BUILDPYTHON) $(PROFILE_TASK) || true
+	$(LLVM_PROF_FILE) $(RUNSHARED) ./$(BUILDPYTHON) $(PROFILE_TASK)
 	$(LLVM_PROF_MERGER)
 	# Remove profile generation binary since we are done with it.
 	$(MAKE) clean-retain-profile
@@ -706,7 +706,7 @@ profile-bolt-stamp: $(BUILDPYTHON)
 	  mv "$${bin}.bolt_inst" "$${bin}"; \
 	done
 	# Run instrumented binaries to collect data.
-	$(RUNSHARED) ./$(BUILDPYTHON) $(PROFILE_TASK) || true
+	$(RUNSHARED) ./$(BUILDPYTHON) $(PROFILE_TASK)
 	# Merge all the data files together.
 	for bin in $(BOLT_BINARIES); do \
 	  @MERGE_FDATA@ $${bin}.*.fdata > "$${bin}.fdata"; \
@@ -1837,7 +1837,7 @@ $(LIBRARY_OBJS) $(MODOBJS) Programs/python.o: $(PYTHON_HEADERS)
 
 TESTOPTS=	$(EXTRATESTOPTS)
 TESTPYTHON=	$(RUNSHARED) $(PYTHON_FOR_BUILD) $(TESTPYTHONOPTS)
-TESTRUNNER=	$(TESTPYTHON) $(srcdir)/Tools/scripts/run_tests.py
+TESTRUNNER=	$(TESTPYTHON) -m test
 TESTTIMEOUT=
 
 # Remove "test_python_*" directories of previous failed test jobs.
@@ -1875,11 +1875,6 @@ buildbottest: all
 	fi
 	$(TESTRUNNER) --slow-ci --timeout=$(TESTTIMEOUT) $(TESTOPTS)
 
-# Like buildbottest, but run Python tests with HOSTRUNNER directly.
-.PHONY: hostrunnertest
-hostrunnertest: all
-	$(RUNSHARED) $(HOSTRUNNER) ./$(BUILDPYTHON) -m test --slow-ci --timeout=$(TESTTIMEOUT) $(TESTOPTS)
-
 .PHONY: pythoninfo
 pythoninfo: all
 		$(RUNSHARED) $(HOSTRUNNER) ./$(BUILDPYTHON) -m test.pythoninfo
diff --git a/Misc/ACKS b/Misc/ACKS
index ccdfae66832f0e..94cb1965676f48 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1373,6 +1373,7 @@ Peter Parente
 Alexandre Parenteau
 Dan Parisien
 HyeSoo Park
+Moonsik Park
 William Park
 Claude Paroz
 Heikki Partanen
diff --git a/Misc/NEWS.d/next/Build/2023-10-03-17-55-09.gh-issue-110276.luaKRg.rst b/Misc/NEWS.d/next/Build/2023-10-03-17-55-09.gh-issue-110276.luaKRg.rst
new file mode 100644
index 00000000000000..392203d21ca45d
--- /dev/null
+++ b/Misc/NEWS.d/next/Build/2023-10-03-17-55-09.gh-issue-110276.luaKRg.rst
@@ -0,0 +1,2 @@
+No longer ignore :envvar:`PROFILE_TASK` failure silently: command used by
+Profile Guided Optimization (PGO). Patch by Victor Stinner.
diff --git a/Misc/NEWS.d/next/C API/2023-09-01-15-35-05.gh-issue-107073.zCz0iN.rst b/Misc/NEWS.d/next/C API/2023-09-01-15-35-05.gh-issue-107073.zCz0iN.rst
new file mode 100644
index 00000000000000..866809091aa5da
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2023-09-01-15-35-05.gh-issue-107073.zCz0iN.rst	
@@ -0,0 +1,3 @@
+Add :c:func:`PyObject_VisitManagedDict` and :c:func:`PyObject_ClearManagedDict`
+functions which must be called by the traverse and clear functions of a type
+using :c:macro:`Py_TPFLAGS_MANAGED_DICT` flag. Patch by Victor Stinner.
diff --git a/Misc/NEWS.d/next/C API/2023-09-04-11-47-12.gh-issue-108867.Cr_LKd.rst b/Misc/NEWS.d/next/C API/2023-09-04-11-47-12.gh-issue-108867.Cr_LKd.rst
new file mode 100644
index 00000000000000..2f56466833f6dd
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2023-09-04-11-47-12.gh-issue-108867.Cr_LKd.rst	
@@ -0,0 +1,5 @@
+Add :c:func:`PyThreadState_GetUnchecked()` function: similar to
+:c:func:`PyThreadState_Get()`, but don't kill the process with a fatal error if
+it is NULL. The caller is responsible to check if the result is NULL.
+Previously, the function was private and known as
+``_PyThreadState_UncheckedGet()``. Patch by Victor Stinner.
diff --git a/Misc/NEWS.d/next/C API/2023-10-02-13-39-57.gh-issue-110014.gfQ4jU.rst b/Misc/NEWS.d/next/C API/2023-10-02-13-39-57.gh-issue-110014.gfQ4jU.rst
new file mode 100644
index 00000000000000..3a5ff7d43bbc01
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2023-10-02-13-39-57.gh-issue-110014.gfQ4jU.rst	
@@ -0,0 +1,2 @@
+Remove undocumented ``PY_TIMEOUT_MAX`` constant from the limited C API.
+Patch by Victor Stinner.
diff --git a/Misc/NEWS.d/next/C API/2023-10-03-06-19-10.gh-issue-110235.uec5AG.rst b/Misc/NEWS.d/next/C API/2023-10-03-06-19-10.gh-issue-110235.uec5AG.rst
new file mode 100644
index 00000000000000..ff26f25fe71d61
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2023-10-03-06-19-10.gh-issue-110235.uec5AG.rst	
@@ -0,0 +1,2 @@
+Raise :exc:`TypeError` for duplicate/unknown fields in ``PyStructSequence`` constructor.
+Patched by Xuehai Pan.
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-09-26-03-46-55.gh-issue-109369.OJbxbF.rst b/Misc/NEWS.d/next/Core and Builtins/2023-09-26-03-46-55.gh-issue-109369.OJbxbF.rst
new file mode 100644
index 00000000000000..ca1f0f1bd44a8c
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-09-26-03-46-55.gh-issue-109369.OJbxbF.rst	
@@ -0,0 +1,2 @@
+The internal eval_breaker and supporting flags, plus the monitoring version
+have been merged into a single atomic integer to speed up checks.
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-09-26-14-00-25.gh-issue-105716.SUJkW1.rst b/Misc/NEWS.d/next/Core and Builtins/2023-09-26-14-00-25.gh-issue-105716.SUJkW1.rst
new file mode 100644
index 00000000000000..b35550fa650dcc
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-09-26-14-00-25.gh-issue-105716.SUJkW1.rst	
@@ -0,0 +1,3 @@
+Subinterpreters now correctly handle the case where they have threads
+running in the background.  Before, such threads would interfere with
+cleaning up and destroying them, as well as prevent running another script.
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-09-27-18-01-06.gh-issue-109853.coQQiL.rst b/Misc/NEWS.d/next/Core and Builtins/2023-09-27-18-01-06.gh-issue-109853.coQQiL.rst
new file mode 100644
index 00000000000000..45de3ba8877b01
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-09-27-18-01-06.gh-issue-109853.coQQiL.rst	
@@ -0,0 +1 @@
+``sys.path[0]`` is now set correctly for subinterpreters.
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-10-01-02-58-00.gh-issue-110164.z7TMCq.rst b/Misc/NEWS.d/next/Core and Builtins/2023-10-01-02-58-00.gh-issue-110164.z7TMCq.rst
new file mode 100644
index 00000000000000..086d70f30e204f
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-10-01-02-58-00.gh-issue-110164.z7TMCq.rst	
@@ -0,0 +1,2 @@
+regrtest: If the ``SOURCE_DATE_EPOCH`` environment variable is defined,
+regrtest now disables tests randomization. Patch by Victor Stinner.
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst b/Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst
new file mode 100644
index 00000000000000..55c743d0e4917e
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst	
@@ -0,0 +1,3 @@
+Correctly identify the format spec in f-strings (with single or triple
+quotes) that have multiple lines in the expression part and include a
+formatting spec. Patch by Pablo Galindo
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-10-03-23-26-18.gh-issue-110309.Y8nDOF.rst b/Misc/NEWS.d/next/Core and Builtins/2023-10-03-23-26-18.gh-issue-110309.Y8nDOF.rst
new file mode 100644
index 00000000000000..830428730391df
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-10-03-23-26-18.gh-issue-110309.Y8nDOF.rst	
@@ -0,0 +1 @@
+Remove unnecessary empty constant nodes in the ast of f-string specs.
diff --git a/Misc/NEWS.d/next/Library/2023-08-04-19-00-53.gh-issue-107465.Vc1Il3.rst b/Misc/NEWS.d/next/Library/2023-08-04-19-00-53.gh-issue-107465.Vc1Il3.rst
new file mode 100644
index 00000000000000..e98092f546e393
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-08-04-19-00-53.gh-issue-107465.Vc1Il3.rst
@@ -0,0 +1 @@
+Add :meth:`pathlib.Path.from_uri` classmethod.
diff --git a/Misc/NEWS.d/next/Library/2023-09-08-19-44-01.gh-issue-109151.GkzkQu.rst b/Misc/NEWS.d/next/Library/2023-09-08-19-44-01.gh-issue-109151.GkzkQu.rst
new file mode 100644
index 00000000000000..78b4e882baba96
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-09-08-19-44-01.gh-issue-109151.GkzkQu.rst
@@ -0,0 +1 @@
+Enable ``readline`` editing features in the :ref:`sqlite3 command-line interface <sqlite3-cli>` (``python -m sqlite3``).
diff --git a/Misc/NEWS.d/next/Library/2023-09-21-16-21-19.gh-issue-109649.YYCjAF.rst b/Misc/NEWS.d/next/Library/2023-09-21-16-21-19.gh-issue-109649.YYCjAF.rst
new file mode 100644
index 00000000000000..ab708e6fb9a7d9
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-09-21-16-21-19.gh-issue-109649.YYCjAF.rst
@@ -0,0 +1,2 @@
+Add :func:`os.process_cpu_count` function to get the number of logical CPUs
+usable by the calling thread of the current process. Patch by Victor Stinner.
diff --git a/Misc/NEWS.d/next/Library/2023-09-24-06-04-14.gh-issue-109590.9EMofC.rst b/Misc/NEWS.d/next/Library/2023-09-24-06-04-14.gh-issue-109590.9EMofC.rst
new file mode 100644
index 00000000000000..647e84e71b42d2
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-09-24-06-04-14.gh-issue-109590.9EMofC.rst
@@ -0,0 +1,3 @@
+:func:`shutil.which` will prefer files with an extension in ``PATHEXT`` if the given mode includes ``os.X_OK`` on win32.
+If no ``PATHEXT`` match is found, a file without an extension in ``PATHEXT`` can be returned.
+This change will have :func:`shutil.which` act more similarly to previous behavior in Python 3.11.
diff --git a/Misc/NEWS.d/next/Library/2023-09-28-12-32-57.gh-issue-88402.hoa3Gx.rst b/Misc/NEWS.d/next/Library/2023-09-28-12-32-57.gh-issue-88402.hoa3Gx.rst
new file mode 100644
index 00000000000000..80ec65081c0dc8
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-09-28-12-32-57.gh-issue-88402.hoa3Gx.rst
@@ -0,0 +1,2 @@
+Add new variables to :py:meth:`sysconfig.get_config_vars` on Windows:
+``LIBRARY``, ``LDLIBRARY``, ``LIBDIR``, ``SOABI``, and ``Py_NOGIL``.
diff --git a/Misc/NEWS.d/next/Library/2023-09-30-12-50-47.gh-issue-110150.9j0Ij5.rst b/Misc/NEWS.d/next/Library/2023-09-30-12-50-47.gh-issue-110150.9j0Ij5.rst
new file mode 100644
index 00000000000000..3c4dde59f71a93
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-09-30-12-50-47.gh-issue-110150.9j0Ij5.rst
@@ -0,0 +1,2 @@
+Fix base case handling in statistics.quantiles.  Now allows a single data
+point.
diff --git a/Misc/NEWS.d/next/Library/2023-10-01-01-47-21.gh-issue-109649.BizOaD.rst b/Misc/NEWS.d/next/Library/2023-10-01-01-47-21.gh-issue-109649.BizOaD.rst
new file mode 100644
index 00000000000000..888fd79962b412
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-10-01-01-47-21.gh-issue-109649.BizOaD.rst
@@ -0,0 +1,4 @@
+:mod:`multiprocessing`, :mod:`concurrent.futures`, :mod:`compileall`:
+Replace :func:`os.cpu_count` with :func:`os.process_cpu_count` to select the
+default number of worker threads and processes. Get the CPU affinity if
+supported. Patch by Victor Stinner.
diff --git a/Misc/NEWS.d/next/Library/2023-10-02-15-07-28.gh-issue-110222.zl_oHh.rst b/Misc/NEWS.d/next/Library/2023-10-02-15-07-28.gh-issue-110222.zl_oHh.rst
new file mode 100644
index 00000000000000..fd2ecdf6269cf3
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-10-02-15-07-28.gh-issue-110222.zl_oHh.rst
@@ -0,0 +1,2 @@
+Add support of struct sequence objects in :func:`copy.replace`.
+Patched by Xuehai Pan.
diff --git a/Misc/NEWS.d/next/Library/2023-10-02-15-40-10.gh-issue-109653.iB0peK.rst b/Misc/NEWS.d/next/Library/2023-10-02-15-40-10.gh-issue-109653.iB0peK.rst
new file mode 100644
index 00000000000000..54330976d71dc1
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-10-02-15-40-10.gh-issue-109653.iB0peK.rst
@@ -0,0 +1,2 @@
+Fix a Python 3.12 regression in the import time of :mod:`random`. Patch by Alex
+Waygood.
diff --git a/Misc/NEWS.d/next/Library/2023-10-03-00-04-26.gh-issue-110249.K0mMrs.rst b/Misc/NEWS.d/next/Library/2023-10-03-00-04-26.gh-issue-110249.K0mMrs.rst
new file mode 100644
index 00000000000000..a7c9c0f60feea2
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-10-03-00-04-26.gh-issue-110249.K0mMrs.rst
@@ -0,0 +1 @@
+Add ``--inline-caches`` flag to ``dis`` command line.
diff --git a/Misc/NEWS.d/next/Library/2023-10-03-14-07-05.gh-issue-110273.QaDUmS.rst b/Misc/NEWS.d/next/Library/2023-10-03-14-07-05.gh-issue-110273.QaDUmS.rst
new file mode 100644
index 00000000000000..98d87da6295ee5
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-10-03-14-07-05.gh-issue-110273.QaDUmS.rst
@@ -0,0 +1,3 @@
+:func:`dataclasses.replace` now raises TypeError instead of ValueError if
+specify keyword argument for a field declared with init=False or miss
+keyword argument for required InitVar field.
diff --git a/Misc/NEWS.d/next/Library/2023-10-03-15-17-03.gh-issue-109653.9DYOMD.rst b/Misc/NEWS.d/next/Library/2023-10-03-15-17-03.gh-issue-109653.9DYOMD.rst
new file mode 100644
index 00000000000000..92e5a1cada9a69
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-10-03-15-17-03.gh-issue-109653.9DYOMD.rst
@@ -0,0 +1,3 @@
+Slightly improve the import time of several standard-library modules by
+deferring imports of :mod:`warnings` within those modules. Patch by Alex
+Waygood.
diff --git a/Misc/NEWS.d/next/Library/2023-10-04-18-56-29.gh-issue-110365.LCxiau.rst b/Misc/NEWS.d/next/Library/2023-10-04-18-56-29.gh-issue-110365.LCxiau.rst
new file mode 100644
index 00000000000000..a1ac39b60296a3
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-10-04-18-56-29.gh-issue-110365.LCxiau.rst
@@ -0,0 +1,2 @@
+Fix :func:`termios.tcsetattr` bug that was overwritting existing errors
+during parsing integers from ``term`` list.
diff --git a/Misc/NEWS.d/next/Tests/2023-09-30-20-18-38.gh-issue-110152.4Kxve1.rst b/Misc/NEWS.d/next/Tests/2023-09-30-20-18-38.gh-issue-110152.4Kxve1.rst
new file mode 100644
index 00000000000000..2fb6cbbad0c449
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2023-09-30-20-18-38.gh-issue-110152.4Kxve1.rst
@@ -0,0 +1,5 @@
+Remove ``Tools/scripts/run_tests.py`` and ``make hostrunnertest``. Just run
+``./python -m test --slow-ci``, ``make buildbottest`` or ``make test`` instead.
+Python test runner (regrtest) now handles cross-compilation and HOSTRUNNER. It
+also adds options to Python such fast ``-u -E -W default -bb`` when
+``--fast-ci`` or ``--slow-ci`` option is used. Patch by Victor Stinner.
diff --git a/Misc/NEWS.d/next/Tests/2023-10-01-10-27-02.gh-issue-110171.ZPlo0h.rst b/Misc/NEWS.d/next/Tests/2023-10-01-10-27-02.gh-issue-110171.ZPlo0h.rst
new file mode 100644
index 00000000000000..9b41b033bc7f2b
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2023-10-01-10-27-02.gh-issue-110171.ZPlo0h.rst
@@ -0,0 +1,3 @@
+``libregrtest`` now always sets and shows ``random.seed``,
+so tests are more reproducible. Use ``--randseed`` flag
+to pass the explicit random seed for tests.
diff --git a/Misc/NEWS.d/next/Tests/2023-10-03-10-54-09.gh-issue-110267.O-c47G.rst b/Misc/NEWS.d/next/Tests/2023-10-03-10-54-09.gh-issue-110267.O-c47G.rst
new file mode 100644
index 00000000000000..2bae7715cc3d5b
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2023-10-03-10-54-09.gh-issue-110267.O-c47G.rst
@@ -0,0 +1,2 @@
+Add tests for pickling and copying PyStructSequence objects.
+Patched by Xuehai Pan.
diff --git a/Misc/NEWS.d/next/Tests/2023-10-04-18-27-47.gh-issue-110367.Nnq1I7.rst b/Misc/NEWS.d/next/Tests/2023-10-04-18-27-47.gh-issue-110367.Nnq1I7.rst
new file mode 100644
index 00000000000000..a1a6a09da509ef
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2023-10-04-18-27-47.gh-issue-110367.Nnq1I7.rst
@@ -0,0 +1,4 @@
+regrtest: When using worker processes (-jN) with --verbose3 option, regrtest
+can now display the worker output even if a worker process does crash.
+Previously, sys.stdout and sys.stderr were replaced and so the worker output
+was lost on a crash. Patch by Victor Stinner.
diff --git a/Misc/NEWS.d/next/Windows/2023-08-18-00-01-21.gh-issue-83180.DdLffv.rst b/Misc/NEWS.d/next/Windows/2023-08-18-00-01-21.gh-issue-83180.DdLffv.rst
new file mode 100644
index 00000000000000..1e59765a7674b1
--- /dev/null
+++ b/Misc/NEWS.d/next/Windows/2023-08-18-00-01-21.gh-issue-83180.DdLffv.rst
@@ -0,0 +1,3 @@
+Changes the :ref:`launcher` to prefer an active virtual environment when the
+launched script has a shebang line using a Unix-like virtual command, even
+if the command requests a specific version of Python.
diff --git a/Misc/NEWS.d/next/Windows/2023-10-03-12-30-59.gh-issue-82367.nxwfMx.rst b/Misc/NEWS.d/next/Windows/2023-10-03-12-30-59.gh-issue-82367.nxwfMx.rst
new file mode 100644
index 00000000000000..613ca075044b51
--- /dev/null
+++ b/Misc/NEWS.d/next/Windows/2023-10-03-12-30-59.gh-issue-82367.nxwfMx.rst
@@ -0,0 +1,2 @@
+:func:`os.path.realpath` now resolves MS-DOS style file names even if
+the file is not accessible. Patch by Moonsik Park.
diff --git a/Misc/requirements-test.txt b/Misc/requirements-test.txt
deleted file mode 100644
index 60e7ed20a3d510..00000000000000
--- a/Misc/requirements-test.txt
+++ /dev/null
@@ -1 +0,0 @@
-tzdata==2020.3
diff --git a/Modules/Setup b/Modules/Setup
index 8676f9ddce4841..1367f0ef4fa54a 100644
--- a/Modules/Setup
+++ b/Modules/Setup
@@ -155,6 +155,7 @@ PYTHONPATH=$(COREPYTHONPATH)
 #math mathmodule.c
 #mmap mmapmodule.c
 #select selectmodule.c
+#_sysconfig _sysconfig.c
 
 # XML
 #_elementtree _elementtree.c
diff --git a/Modules/Setup.bootstrap.in b/Modules/Setup.bootstrap.in
index 8ef0f203a82a8e..cd12c1bd0df8f9 100644
--- a/Modules/Setup.bootstrap.in
+++ b/Modules/Setup.bootstrap.in
@@ -19,6 +19,7 @@ errno errnomodule.c
 _io _io/_iomodule.c _io/iobase.c _io/fileio.c _io/bytesio.c _io/bufferedio.c _io/textio.c _io/stringio.c
 itertools itertoolsmodule.c
 _sre _sre/sre.c
+_sysconfig _sysconfig.c
 _thread _threadmodule.c
 time timemodule.c
 _typing _typingmodule.c
diff --git a/Modules/_asynciomodule.c b/Modules/_asynciomodule.c
index c66a8623413f4b..e911286660b56e 100644
--- a/Modules/_asynciomodule.c
+++ b/Modules/_asynciomodule.c
@@ -816,7 +816,7 @@ FutureObj_clear(FutureObj *fut)
     Py_CLEAR(fut->fut_source_tb);
     Py_CLEAR(fut->fut_cancel_msg);
     Py_CLEAR(fut->fut_cancelled_exc);
-    _PyObject_ClearManagedDict((PyObject *)fut);
+    PyObject_ClearManagedDict((PyObject *)fut);
     return 0;
 }
 
@@ -834,7 +834,7 @@ FutureObj_traverse(FutureObj *fut, visitproc visit, void *arg)
     Py_VISIT(fut->fut_source_tb);
     Py_VISIT(fut->fut_cancel_msg);
     Py_VISIT(fut->fut_cancelled_exc);
-    _PyObject_VisitManagedDict((PyObject *)fut, visit, arg);
+    PyObject_VisitManagedDict((PyObject *)fut, visit, arg);
     return 0;
 }
 
@@ -2181,7 +2181,7 @@ TaskObj_traverse(TaskObj *task, visitproc visit, void *arg)
     Py_VISIT(fut->fut_source_tb);
     Py_VISIT(fut->fut_cancel_msg);
     Py_VISIT(fut->fut_cancelled_exc);
-    _PyObject_VisitManagedDict((PyObject *)fut, visit, arg);
+    PyObject_VisitManagedDict((PyObject *)fut, visit, arg);
     return 0;
 }
 
diff --git a/Modules/_decimal/tests/deccheck.py b/Modules/_decimal/tests/deccheck.py
index edf753f3704a18..bf277dd6879ffe 100644
--- a/Modules/_decimal/tests/deccheck.py
+++ b/Modules/_decimal/tests/deccheck.py
@@ -1301,7 +1301,7 @@ def tfunc():
                 out, _ = p.communicate()
                 write_output(out, p.returncode)
 
-        N = os.cpu_count()
+        N = os.process_cpu_count()
         t = N * [None]
 
         for i in range(N):
diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c
index fb416700e22523..8a73ea0365b7a3 100644
--- a/Modules/_io/fileio.c
+++ b/Modules/_io/fileio.c
@@ -5,20 +5,23 @@
 #include "pycore_object.h"        // _PyObject_GC_UNTRACK()
 #include "pycore_pyerrors.h"      // _PyErr_ChainExceptions1()
 
-#include <stdbool.h>
+#include <stdbool.h>              // bool
+#ifdef HAVE_UNISTD_H
+#  include <unistd.h>             // lseek()
+#endif
 #ifdef HAVE_SYS_TYPES_H
-#include <sys/types.h>
+#  include <sys/types.h>
 #endif
 #ifdef HAVE_SYS_STAT_H
-#include <sys/stat.h>
+#  include <sys/stat.h>
 #endif
 #ifdef HAVE_IO_H
-#include <io.h>
+#  include <io.h>
 #endif
 #ifdef HAVE_FCNTL_H
-#include <fcntl.h>
+#  include <fcntl.h>              // open()
 #endif
-#include <stddef.h> /* For offsetof */
+
 #include "_iomodule.h"
 
 /*
@@ -35,22 +38,23 @@
  */
 
 #ifdef MS_WINDOWS
-/* can simulate truncate with Win32 API functions; see file_truncate */
-#define HAVE_FTRUNCATE
-#ifndef WIN32_LEAN_AND_MEAN
-#define WIN32_LEAN_AND_MEAN
-#endif
-#include <windows.h>
+   // can simulate truncate with Win32 API functions; see file_truncate
+#  define HAVE_FTRUNCATE
+#  ifndef WIN32_LEAN_AND_MEAN
+#    define WIN32_LEAN_AND_MEAN
+#  endif
+#  include <windows.h>
 #endif
 
 #if BUFSIZ < (8*1024)
-#define SMALLCHUNK (8*1024)
+#  define SMALLCHUNK (8*1024)
 #elif (BUFSIZ >= (2 << 25))
-#error "unreasonable BUFSIZ > 64 MiB defined"
+#  error "unreasonable BUFSIZ > 64 MiB defined"
 #else
-#define SMALLCHUNK BUFSIZ
+#  define SMALLCHUNK BUFSIZ
 #endif
 
+
 /*[clinic input]
 module _io
 class _io.FileIO "fileio *" "clinic_state()->PyFileIO_Type"
diff --git a/Modules/_randommodule.c b/Modules/_randommodule.c
index 18811d03adb451..d41093c8806476 100644
--- a/Modules/_randommodule.c
+++ b/Modules/_randommodule.c
@@ -74,12 +74,15 @@
 #include "pycore_long.h"          // _PyLong_AsByteArray()
 #include "pycore_moduleobject.h"  // _PyModule_GetState()
 #include "pycore_pylifecycle.h"   // _PyOS_URandomNonblock()
+
+#ifdef HAVE_UNISTD_H
+#  include <unistd.h>             // getpid()
+#endif
 #ifdef HAVE_PROCESS_H
 #  include <process.h>            // getpid()
 #endif
-
 #ifdef MS_WINDOWS
-#  include <windows.h>
+#  include <windows.h>            // GetCurrentProcessId()
 #endif
 
 /* Period parameters -- These are all magic.  Don't change. */
diff --git a/Modules/_sysconfig.c b/Modules/_sysconfig.c
new file mode 100644
index 00000000000000..6f1cc16b58467d
--- /dev/null
+++ b/Modules/_sysconfig.c
@@ -0,0 +1,98 @@
+// _sysconfig provides data for the Python sysconfig module
+
+#ifndef Py_BUILD_CORE_BUILTIN
+#  define Py_BUILD_CORE_MODULE 1
+#endif
+
+#include "Python.h"
+
+#include "pycore_importdl.h"   // _PyImport_DynLoadFiletab
+#include "pycore_long.h"       // _PyLong_GetZero, _PyLong_GetOne
+
+
+/*[clinic input]
+module _sysconfig
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=0a7c02d3e212ac97]*/
+
+#include "clinic/_sysconfig.c.h"
+
+#ifdef MS_WINDOWS
+static int
+add_string_value(PyObject *dict, const char *key, const char *str_value)
+{
+    PyObject *value = PyUnicode_FromString(str_value);
+    if (value == NULL) {
+        return -1;
+    }
+    int err = PyDict_SetItemString(dict, key, value);
+    Py_DECREF(value);
+    return err;
+}
+#endif
+
+/*[clinic input]
+_sysconfig.config_vars
+
+Returns a dictionary containing build variables intended to be exposed by sysconfig.
+[clinic start generated code]*/
+
+static PyObject *
+_sysconfig_config_vars_impl(PyObject *module)
+/*[clinic end generated code: output=9c41cdee63ea9487 input=391ff42f3af57d01]*/
+{
+    PyObject *config = PyDict_New();
+    if (config == NULL) {
+        return NULL;
+    }
+
+#ifdef MS_WINDOWS
+    if (add_string_value(config, "EXT_SUFFIX", PYD_TAGGED_SUFFIX) < 0) {
+        Py_DECREF(config);
+        return NULL;
+    }
+    if (add_string_value(config, "SOABI", PYD_SOABI) < 0) {
+        Py_DECREF(config);
+        return NULL;
+    }
+#endif
+
+#ifdef Py_NOGIL
+    PyObject *py_nogil = _PyLong_GetOne();
+#else
+    PyObject *py_nogil = _PyLong_GetZero();
+#endif
+    if (PyDict_SetItemString(config, "Py_NOGIL", py_nogil) < 0) {
+        Py_DECREF(config);
+        return NULL;
+    }
+
+    return config;
+}
+
+PyDoc_STRVAR(sysconfig__doc__,
+"A helper for the sysconfig module.");
+
+static struct PyMethodDef sysconfig_methods[] = {
+    _SYSCONFIG_CONFIG_VARS_METHODDEF
+    {NULL, NULL}
+};
+
+static PyModuleDef_Slot sysconfig_slots[] = {
+    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+    {0, NULL}
+};
+
+static PyModuleDef sysconfig_module = {
+    .m_base = PyModuleDef_HEAD_INIT,
+    .m_name = "_sysconfig",
+    .m_doc = sysconfig__doc__,
+    .m_methods = sysconfig_methods,
+    .m_slots = sysconfig_slots,
+};
+
+PyMODINIT_FUNC
+PyInit__sysconfig(void)
+{
+    return PyModuleDef_Init(&sysconfig_module);
+}
diff --git a/Modules/_testcapi/heaptype.c b/Modules/_testcapi/heaptype.c
index d14a1763184207..4526583a8059d9 100644
--- a/Modules/_testcapi/heaptype.c
+++ b/Modules/_testcapi/heaptype.c
@@ -805,13 +805,13 @@ static int
 heapmanaged_traverse(HeapCTypeObject *self, visitproc visit, void *arg)
 {
     Py_VISIT(Py_TYPE(self));
-    return _PyObject_VisitManagedDict((PyObject *)self, visit, arg);
+    return PyObject_VisitManagedDict((PyObject *)self, visit, arg);
 }
 
 static int
 heapmanaged_clear(HeapCTypeObject *self)
 {
-    _PyObject_ClearManagedDict((PyObject *)self);
+    PyObject_ClearManagedDict((PyObject *)self);
     return 0;
 }
 
@@ -819,7 +819,7 @@ static void
 heapmanaged_dealloc(HeapCTypeObject *self)
 {
     PyTypeObject *tp = Py_TYPE(self);
-    _PyObject_ClearManagedDict((PyObject *)self);
+    PyObject_ClearManagedDict((PyObject *)self);
     PyObject_GC_UnTrack(self);
     PyObject_GC_Del(self);
     Py_DECREF(tp);
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
index e09fd8806d2f64..a46d986c18ecd4 100644
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -2458,8 +2458,8 @@ test_tstate_capi(PyObject *self, PyObject *Py_UNUSED(args))
     PyThreadState *tstate2 = PyThreadState_Get();
     assert(tstate2 == tstate);
 
-    // private _PyThreadState_UncheckedGet()
-    PyThreadState *tstate3 = _PyThreadState_UncheckedGet();
+    // PyThreadState_GetUnchecked()
+    PyThreadState *tstate3 = PyThreadState_GetUnchecked();
     assert(tstate3 == tstate);
 
     // PyThreadState_EnterTracing(), PyThreadState_LeaveTracing()
@@ -2923,7 +2923,7 @@ settrace_to_error(PyObject *self, PyObject *list)
 static PyObject *
 clear_managed_dict(PyObject *self, PyObject *obj)
 {
-    _PyObject_ClearManagedDict(obj);
+    PyObject_ClearManagedDict(obj);
     Py_RETURN_NONE;
 }
 
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index c6b80fffdec16d..05bac0936b155d 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -675,7 +675,11 @@ record_eval(PyThreadState *tstate, struct _PyInterpreterFrame *f, int exc)
         assert(module != NULL);
         module_state *state = get_module_state(module);
         Py_DECREF(module);
-        PyList_Append(state->record_list, ((PyFunctionObject *)f->f_funcobj)->func_name);
+        int res = PyList_Append(state->record_list,
+                                ((PyFunctionObject *)f->f_funcobj)->func_name);
+        if (res < 0) {
+            return NULL;
+        }
     }
     return _PyEval_EvalFrameDefault(tstate, f, exc);
 }
diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c
index e77e30dfe5e821..86bd560b92ba6b 100644
--- a/Modules/_threadmodule.c
+++ b/Modules/_threadmodule.c
@@ -1205,7 +1205,7 @@ thread_PyThread_start_new_thread(PyObject *self, PyObject *fargs)
     if (boot == NULL) {
         return PyErr_NoMemory();
     }
-    boot->tstate = _PyThreadState_New(interp);
+    boot->tstate = _PyThreadState_New(interp, _PyThreadState_WHENCE_THREADING);
     if (boot->tstate == NULL) {
         PyMem_RawFree(boot);
         if (!PyErr_Occurred()) {
@@ -1605,6 +1605,18 @@ PyDoc_STRVAR(excepthook_doc,
 \n\
 Handle uncaught Thread.run() exception.");
 
+static PyObject *
+thread__is_main_interpreter(PyObject *module, PyObject *Py_UNUSED(ignored))
+{
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    return PyBool_FromLong(_Py_IsMainInterpreter(interp));
+}
+
+PyDoc_STRVAR(thread__is_main_interpreter_doc,
+"_is_main_interpreter()\n\
+\n\
+Return True if the current interpreter is the main Python interpreter.");
+
 static PyMethodDef thread_methods[] = {
     {"start_new_thread",        (PyCFunction)thread_PyThread_start_new_thread,
      METH_VARARGS, start_new_doc},
@@ -1634,8 +1646,10 @@ static PyMethodDef thread_methods[] = {
      METH_VARARGS, stack_size_doc},
     {"_set_sentinel",           thread__set_sentinel,
      METH_NOARGS, _set_sentinel_doc},
-    {"_excepthook",              thread_excepthook,
+    {"_excepthook",             thread_excepthook,
      METH_O, excepthook_doc},
+    {"_is_main_interpreter",    thread__is_main_interpreter,
+     METH_NOARGS, thread__is_main_interpreter_doc},
     {NULL,                      NULL}           /* sentinel */
 };
 
diff --git a/Modules/_xxinterpchannelsmodule.c b/Modules/_xxinterpchannelsmodule.c
index 6096f88421a73a..d762f449c407a3 100644
--- a/Modules/_xxinterpchannelsmodule.c
+++ b/Modules/_xxinterpchannelsmodule.c
@@ -198,6 +198,9 @@ _release_xid_data(_PyCrossInterpreterData *data, int flags)
 /* module state *************************************************************/
 
 typedef struct {
+    PyTypeObject *send_channel_type;
+    PyTypeObject *recv_channel_type;
+
     /* heap types */
     PyTypeObject *ChannelIDType;
 
@@ -218,6 +221,21 @@ get_module_state(PyObject *mod)
     return state;
 }
 
+static module_state *
+_get_current_module_state(void)
+{
+    PyObject *mod = _get_current_module();
+    if (mod == NULL) {
+        // XXX import it?
+        PyErr_SetString(PyExc_RuntimeError,
+                        MODULE_NAME " module not imported yet");
+        return NULL;
+    }
+    module_state *state = get_module_state(mod);
+    Py_DECREF(mod);
+    return state;
+}
+
 static int
 traverse_module_state(module_state *state, visitproc visit, void *arg)
 {
@@ -237,6 +255,9 @@ traverse_module_state(module_state *state, visitproc visit, void *arg)
 static int
 clear_module_state(module_state *state)
 {
+    Py_CLEAR(state->send_channel_type);
+    Py_CLEAR(state->recv_channel_type);
+
     /* heap types */
     if (state->ChannelIDType != NULL) {
         (void)_PyCrossInterpreterData_UnregisterClass(state->ChannelIDType);
@@ -1529,17 +1550,20 @@ typedef struct channelid {
 struct channel_id_converter_data {
     PyObject *module;
     int64_t cid;
+    int end;
 };
 
 static int
 channel_id_converter(PyObject *arg, void *ptr)
 {
     int64_t cid;
+    int end = 0;
     struct channel_id_converter_data *data = ptr;
     module_state *state = get_module_state(data->module);
     assert(state != NULL);
     if (PyObject_TypeCheck(arg, state->ChannelIDType)) {
         cid = ((channelid *)arg)->id;
+        end = ((channelid *)arg)->end;
     }
     else if (PyIndex_Check(arg)) {
         cid = PyLong_AsLongLong(arg);
@@ -1559,6 +1583,7 @@ channel_id_converter(PyObject *arg, void *ptr)
         return 0;
     }
     data->cid = cid;
+    data->end = end;
     return 1;
 }
 
@@ -1600,6 +1625,7 @@ _channelid_new(PyObject *mod, PyTypeObject *cls,
 {
     static char *kwlist[] = {"id", "send", "recv", "force", "_resolve", NULL};
     int64_t cid;
+    int end;
     struct channel_id_converter_data cid_data = {
         .module = mod,
     };
@@ -1614,6 +1640,7 @@ _channelid_new(PyObject *mod, PyTypeObject *cls,
         return NULL;
     }
     cid = cid_data.cid;
+    end = cid_data.end;
 
     // Handle "send" and "recv".
     if (send == 0 && recv == 0) {
@@ -1621,14 +1648,17 @@ _channelid_new(PyObject *mod, PyTypeObject *cls,
                         "'send' and 'recv' cannot both be False");
         return NULL;
     }
-
-    int end = 0;
-    if (send == 1) {
+    else if (send == 1) {
         if (recv == 0 || recv == -1) {
             end = CHANNEL_SEND;
         }
+        else {
+            assert(recv == 1);
+            end = 0;
+        }
     }
     else if (recv == 1) {
+        assert(send == 0 || send == -1);
         end = CHANNEL_RECV;
     }
 
@@ -1773,21 +1803,12 @@ channelid_richcompare(PyObject *self, PyObject *other, int op)
     return res;
 }
 
+static PyTypeObject * _get_current_channel_end_type(int end);
+
 static PyObject *
 _channel_from_cid(PyObject *cid, int end)
 {
-    PyObject *highlevel = PyImport_ImportModule("interpreters");
-    if (highlevel == NULL) {
-        PyErr_Clear();
-        highlevel = PyImport_ImportModule("test.support.interpreters");
-        if (highlevel == NULL) {
-            return NULL;
-        }
-    }
-    const char *clsname = (end == CHANNEL_RECV) ? "RecvChannel" :
-                                                  "SendChannel";
-    PyObject *cls = PyObject_GetAttrString(highlevel, clsname);
-    Py_DECREF(highlevel);
+    PyObject *cls = (PyObject *)_get_current_channel_end_type(end);
     if (cls == NULL) {
         return NULL;
     }
@@ -1943,6 +1964,107 @@ static PyType_Spec ChannelIDType_spec = {
 };
 
 
+/* SendChannel and RecvChannel classes */
+
+// XXX Use a new __xid__ protocol instead?
+
+static PyTypeObject *
+_get_current_channel_end_type(int end)
+{
+    module_state *state = _get_current_module_state();
+    if (state == NULL) {
+        return NULL;
+    }
+    PyTypeObject *cls;
+    if (end == CHANNEL_SEND) {
+        cls = state->send_channel_type;
+    }
+    else {
+        assert(end == CHANNEL_RECV);
+        cls = state->recv_channel_type;
+    }
+    if (cls == NULL) {
+        PyObject *highlevel = PyImport_ImportModule("interpreters");
+        if (highlevel == NULL) {
+            PyErr_Clear();
+            highlevel = PyImport_ImportModule("test.support.interpreters");
+            if (highlevel == NULL) {
+                return NULL;
+            }
+        }
+        Py_DECREF(highlevel);
+        if (end == CHANNEL_SEND) {
+            cls = state->send_channel_type;
+        }
+        else {
+            cls = state->recv_channel_type;
+        }
+        assert(cls != NULL);
+    }
+    return cls;
+}
+
+static PyObject *
+_channel_end_from_xid(_PyCrossInterpreterData *data)
+{
+    channelid *cid = (channelid *)_channelid_from_xid(data);
+    if (cid == NULL) {
+        return NULL;
+    }
+    PyTypeObject *cls = _get_current_channel_end_type(cid->end);
+    if (cls == NULL) {
+        Py_DECREF(cid);
+        return NULL;
+    }
+    PyObject *obj = PyObject_CallOneArg((PyObject *)cls, (PyObject *)cid);
+    Py_DECREF(cid);
+    return obj;
+}
+
+static int
+_channel_end_shared(PyThreadState *tstate, PyObject *obj,
+                    _PyCrossInterpreterData *data)
+{
+    PyObject *cidobj = PyObject_GetAttrString(obj, "_id");
+    if (cidobj == NULL) {
+        return -1;
+    }
+    int res = _channelid_shared(tstate, cidobj, data);
+    Py_DECREF(cidobj);
+    if (res < 0) {
+        return -1;
+    }
+    data->new_object = _channel_end_from_xid;
+    return 0;
+}
+
+static int
+set_channel_end_types(PyObject *mod, PyTypeObject *send, PyTypeObject *recv)
+{
+    module_state *state = get_module_state(mod);
+    if (state == NULL) {
+        return -1;
+    }
+
+    if (state->send_channel_type != NULL
+        || state->recv_channel_type != NULL)
+    {
+        PyErr_SetString(PyExc_TypeError, "already registered");
+        return -1;
+    }
+    state->send_channel_type = (PyTypeObject *)Py_NewRef(send);
+    state->recv_channel_type = (PyTypeObject *)Py_NewRef(recv);
+
+    if (_PyCrossInterpreterData_RegisterClass(send, _channel_end_shared)) {
+        return -1;
+    }
+    if (_PyCrossInterpreterData_RegisterClass(recv, _channel_end_shared)) {
+        return -1;
+    }
+
+    return 0;
+}
+
 /* module level code ********************************************************/
 
 /* globals is the process-global state for the module.  It holds all
@@ -2346,13 +2468,41 @@ channel__channel_id(PyObject *self, PyObject *args, PyObject *kwds)
         return NULL;
     }
     PyTypeObject *cls = state->ChannelIDType;
+
     PyObject *mod = get_module_from_owned_type(cls);
-    if (mod == NULL) {
+    assert(mod == self);
+    Py_DECREF(mod);
+
+    return _channelid_new(self, cls, args, kwds);
+}
+
+static PyObject *
+channel__register_end_types(PyObject *self, PyObject *args, PyObject *kwds)
+{
+    static char *kwlist[] = {"send", "recv", NULL};
+    PyObject *send;
+    PyObject *recv;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds,
+                                     "OO:_register_end_types", kwlist,
+                                     &send, &recv)) {
         return NULL;
     }
-    PyObject *cid = _channelid_new(mod, cls, args, kwds);
-    Py_DECREF(mod);
-    return cid;
+    if (!PyType_Check(send)) {
+        PyErr_SetString(PyExc_TypeError, "expected a type for 'send'");
+        return NULL;
+    }
+    if (!PyType_Check(recv)) {
+        PyErr_SetString(PyExc_TypeError, "expected a type for 'recv'");
+        return NULL;
+    }
+    PyTypeObject *cls_send = (PyTypeObject *)send;
+    PyTypeObject *cls_recv = (PyTypeObject *)recv;
+
+    if (set_channel_end_types(self, cls_send, cls_recv) < 0) {
+        return NULL;
+    }
+
+    Py_RETURN_NONE;
 }
 
 static PyMethodDef module_functions[] = {
@@ -2374,6 +2524,8 @@ static PyMethodDef module_functions[] = {
      METH_VARARGS | METH_KEYWORDS, channel_release_doc},
     {"_channel_id",               _PyCFunction_CAST(channel__channel_id),
      METH_VARARGS | METH_KEYWORDS, NULL},
+    {"_register_end_types",       _PyCFunction_CAST(channel__register_end_types),
+     METH_VARARGS | METH_KEYWORDS, NULL},
 
     {NULL,                        NULL}           /* sentinel */
 };
diff --git a/Modules/_xxsubinterpretersmodule.c b/Modules/_xxsubinterpretersmodule.c
index 1ddf64909bf18a..bca16ac8a62eca 100644
--- a/Modules/_xxsubinterpretersmodule.c
+++ b/Modules/_xxsubinterpretersmodule.c
@@ -8,6 +8,7 @@
 #include "Python.h"
 #include "pycore_initconfig.h"    // _PyErr_SetFromPyStatus()
 #include "pycore_pyerrors.h"      // _PyErr_ChainExceptions1()
+#include "pycore_pystate.h"       // _PyInterpreterState_SetRunningMain()
 #include "interpreteridobject.h"
 
 
@@ -241,6 +242,11 @@ _sharedns_apply(_sharedns *shared, PyObject *ns)
 // of the exception in the calling interpreter.
 
 typedef struct _sharedexception {
+    PyInterpreterState *interp;
+#define ERR_NOT_SET 0
+#define ERR_NO_MEMORY 1
+#define ERR_ALREADY_RUNNING 2
+    int code;
     const char *name;
     const char *msg;
 } _sharedexception;
@@ -262,14 +268,26 @@ _sharedexception_clear(_sharedexception *exc)
 }
 
 static const char *
-_sharedexception_bind(PyObject *exc, _sharedexception *sharedexc)
+_sharedexception_bind(PyObject *exc, int code, _sharedexception *sharedexc)
 {
+    if (sharedexc->interp == NULL) {
+        sharedexc->interp = PyInterpreterState_Get();
+    }
+
+    if (code != ERR_NOT_SET) {
+        assert(exc == NULL);
+        assert(code > 0);
+        sharedexc->code = code;
+        return NULL;
+    }
+
     assert(exc != NULL);
     const char *failure = NULL;
 
     PyObject *nameobj = PyUnicode_FromString(Py_TYPE(exc)->tp_name);
     if (nameobj == NULL) {
         failure = "unable to format exception type name";
+        code = ERR_NO_MEMORY;
         goto error;
     }
     sharedexc->name = _copy_raw_string(nameobj);
@@ -280,6 +298,7 @@ _sharedexception_bind(PyObject *exc, _sharedexception *sharedexc)
         } else {
             failure = "unable to encode and copy exception type name";
         }
+        code = ERR_NO_MEMORY;
         goto error;
     }
 
@@ -287,6 +306,7 @@ _sharedexception_bind(PyObject *exc, _sharedexception *sharedexc)
         PyObject *msgobj = PyUnicode_FromFormat("%S", exc);
         if (msgobj == NULL) {
             failure = "unable to format exception message";
+            code = ERR_NO_MEMORY;
             goto error;
         }
         sharedexc->msg = _copy_raw_string(msgobj);
@@ -297,6 +317,7 @@ _sharedexception_bind(PyObject *exc, _sharedexception *sharedexc)
             } else {
                 failure = "unable to encode and copy exception message";
             }
+            code = ERR_NO_MEMORY;
             goto error;
         }
     }
@@ -307,7 +328,10 @@ _sharedexception_bind(PyObject *exc, _sharedexception *sharedexc)
     assert(failure != NULL);
     PyErr_Clear();
     _sharedexception_clear(sharedexc);
-    *sharedexc = no_exception;
+    *sharedexc = (_sharedexception){
+        .interp = sharedexc->interp,
+        .code = code,
+    };
     return failure;
 }
 
@@ -315,6 +339,7 @@ static void
 _sharedexception_apply(_sharedexception *exc, PyObject *wrapperclass)
 {
     if (exc->name != NULL) {
+        assert(exc->code == ERR_NOT_SET);
         if (exc->msg != NULL) {
             PyErr_Format(wrapperclass, "%s: %s",  exc->name, exc->msg);
         }
@@ -323,9 +348,19 @@ _sharedexception_apply(_sharedexception *exc, PyObject *wrapperclass)
         }
     }
     else if (exc->msg != NULL) {
+        assert(exc->code == ERR_NOT_SET);
         PyErr_SetString(wrapperclass, exc->msg);
     }
+    else if (exc->code == ERR_NO_MEMORY) {
+        PyErr_NoMemory();
+    }
+    else if (exc->code == ERR_ALREADY_RUNNING) {
+        assert(exc->interp != NULL);
+        assert(_PyInterpreterState_IsRunningMain(exc->interp));
+        _PyInterpreterState_FailIfRunningMain(exc->interp);
+    }
     else {
+        assert(exc->code == ERR_NOT_SET);
         PyErr_SetNone(wrapperclass);
     }
 }
@@ -358,41 +393,21 @@ exceptions_init(PyObject *mod)
 }
 
 static int
-_is_running(PyInterpreterState *interp)
+_run_script(PyInterpreterState *interp, const char *codestr,
+            _sharedns *shared, _sharedexception *sharedexc)
 {
-    PyThreadState *tstate = PyInterpreterState_ThreadHead(interp);
-    if (PyThreadState_Next(tstate) != NULL) {
-        PyErr_SetString(PyExc_RuntimeError,
-                        "interpreter has more than one thread");
-        return -1;
-    }
-
-    assert(!PyErr_Occurred());
-    struct _PyInterpreterFrame *frame = tstate->current_frame;
-    if (frame == NULL) {
-        return 0;
-    }
-    return 1;
-}
+    int errcode = ERR_NOT_SET;
 
-static int
-_ensure_not_running(PyInterpreterState *interp)
-{
-    int is_running = _is_running(interp);
-    if (is_running < 0) {
-        return -1;
-    }
-    if (is_running) {
-        PyErr_Format(PyExc_RuntimeError, "interpreter already running");
-        return -1;
+    if (_PyInterpreterState_SetRunningMain(interp) < 0) {
+        assert(PyErr_Occurred());
+        // In the case where we didn't switch interpreters, it would
+        // be more efficient to leave the exception in place and return
+        // immediately.  However, life is simpler if we don't.
+        PyErr_Clear();
+        errcode = ERR_ALREADY_RUNNING;
+        goto error;
     }
-    return 0;
-}
 
-static int
-_run_script(PyInterpreterState *interp, const char *codestr,
-            _sharedns *shared, _sharedexception *sharedexc)
-{
     PyObject *excval = NULL;
     PyObject *main_mod = PyUnstable_InterpreterState_GetMainModule(interp);
     if (main_mod == NULL) {
@@ -422,20 +437,29 @@ _run_script(PyInterpreterState *interp, const char *codestr,
     else {
         Py_DECREF(result);  // We throw away the result.
     }
+    _PyInterpreterState_SetNotRunningMain(interp);
 
     *sharedexc = no_exception;
     return 0;
 
 error:
     excval = PyErr_GetRaisedException();
-    const char *failure = _sharedexception_bind(excval, sharedexc);
+    const char *failure = _sharedexception_bind(excval, errcode, sharedexc);
     if (failure != NULL) {
         fprintf(stderr,
                 "RunFailedError: script raised an uncaught exception (%s)",
                 failure);
-        PyErr_Clear();
     }
-    Py_XDECREF(excval);
+    if (excval != NULL) {
+        // XXX Instead, store the rendered traceback on sharedexc,
+        // attach it to the exception when applied,
+        // and teach PyErr_Display() to print it.
+        PyErr_Display(NULL, excval, NULL);
+        Py_DECREF(excval);
+    }
+    if (errcode != ERR_ALREADY_RUNNING) {
+        _PyInterpreterState_SetNotRunningMain(interp);
+    }
     assert(!PyErr_Occurred());
     return -1;
 }
@@ -444,10 +468,8 @@ static int
 _run_script_in_interpreter(PyObject *mod, PyInterpreterState *interp,
                            const char *codestr, PyObject *shareables)
 {
-    if (_ensure_not_running(interp) < 0) {
-        return -1;
-    }
     module_state *state = get_module_state(mod);
+    assert(state != NULL);
 
     _sharedns *shared = _get_shared_ns(shareables);
     if (shared == NULL && PyErr_Occurred()) {
@@ -456,30 +478,30 @@ _run_script_in_interpreter(PyObject *mod, PyInterpreterState *interp,
 
     // Switch to interpreter.
     PyThreadState *save_tstate = NULL;
+    PyThreadState *tstate = NULL;
     if (interp != PyInterpreterState_Get()) {
-        // XXX Using the "head" thread isn't strictly correct.
-        PyThreadState *tstate = PyInterpreterState_ThreadHead(interp);
+        tstate = PyThreadState_New(interp);
+        tstate->_whence = _PyThreadState_WHENCE_EXEC;
         // XXX Possible GILState issues?
         save_tstate = PyThreadState_Swap(tstate);
     }
 
     // Run the script.
-    _sharedexception exc = {NULL, NULL};
+    _sharedexception exc = (_sharedexception){ .interp = interp };
     int result = _run_script(interp, codestr, shared, &exc);
 
     // Switch back.
     if (save_tstate != NULL) {
+        PyThreadState_Clear(tstate);
         PyThreadState_Swap(save_tstate);
+        PyThreadState_Delete(tstate);
     }
 
     // Propagate any exception out to the caller.
-    if (exc.name != NULL) {
-        assert(state != NULL);
+    if (result < 0) {
+        assert(!PyErr_Occurred());
         _sharedexception_apply(&exc, state->RunFailedError);
-    }
-    else if (result != 0) {
-        // We were unable to allocate a shared exception.
-        PyErr_NoMemory();
+        assert(PyErr_Occurred());
     }
 
     if (shared != NULL) {
@@ -509,6 +531,7 @@ interp_create(PyObject *self, PyObject *args, PyObject *kwds)
     const PyInterpreterConfig config = isolated
         ? (PyInterpreterConfig)_PyInterpreterConfig_INIT
         : (PyInterpreterConfig)_PyInterpreterConfig_LEGACY_INIT;
+
     // XXX Possible GILState issues?
     PyThreadState *tstate = NULL;
     PyStatus status = Py_NewInterpreterFromConfig(&tstate, &config);
@@ -524,6 +547,7 @@ interp_create(PyObject *self, PyObject *args, PyObject *kwds)
         return NULL;
     }
     assert(tstate != NULL);
+
     PyInterpreterState *interp = PyThreadState_GetInterpreter(tstate);
     PyObject *idobj = PyInterpreterState_GetIDObject(interp);
     if (idobj == NULL) {
@@ -533,6 +557,10 @@ interp_create(PyObject *self, PyObject *args, PyObject *kwds)
         PyThreadState_Swap(save_tstate);
         return NULL;
     }
+
+    PyThreadState_Clear(tstate);
+    PyThreadState_Delete(tstate);
+
     _PyInterpreterState_RequireIDRef(interp, 1);
     return idobj;
 }
@@ -574,12 +602,14 @@ interp_destroy(PyObject *self, PyObject *args, PyObject *kwds)
     // Ensure the interpreter isn't running.
     /* XXX We *could* support destroying a running interpreter but
        aren't going to worry about it for now. */
-    if (_ensure_not_running(interp) < 0) {
+    if (_PyInterpreterState_IsRunningMain(interp)) {
+        PyErr_Format(PyExc_RuntimeError, "interpreter running");
         return NULL;
     }
 
     // Destroy the interpreter.
-    PyThreadState *tstate = PyInterpreterState_ThreadHead(interp);
+    PyThreadState *tstate = PyThreadState_New(interp);
+    tstate->_whence = _PyThreadState_WHENCE_INTERP;
     // XXX Possible GILState issues?
     PyThreadState *save_tstate = PyThreadState_Swap(tstate);
     Py_EndInterpreter(tstate);
@@ -748,11 +778,7 @@ interp_is_running(PyObject *self, PyObject *args, PyObject *kwds)
     if (interp == NULL) {
         return NULL;
     }
-    int is_running = _is_running(interp);
-    if (is_running < 0) {
-        return NULL;
-    }
-    if (is_running) {
+    if (_PyInterpreterState_IsRunningMain(interp)) {
         Py_RETURN_TRUE;
     }
     Py_RETURN_FALSE;
@@ -763,6 +789,7 @@ PyDoc_STRVAR(is_running_doc,
 \n\
 Return whether or not the identified interpreter is running.");
 
+
 static PyMethodDef module_functions[] = {
     {"create",                    _PyCFunction_CAST(interp_create),
      METH_VARARGS | METH_KEYWORDS, create_doc},
diff --git a/Modules/clinic/_sysconfig.c.h b/Modules/clinic/_sysconfig.c.h
new file mode 100644
index 00000000000000..eb3d396298bb21
--- /dev/null
+++ b/Modules/clinic/_sysconfig.c.h
@@ -0,0 +1,22 @@
+/*[clinic input]
+preserve
+[clinic start generated code]*/
+
+PyDoc_STRVAR(_sysconfig_config_vars__doc__,
+"config_vars($module, /)\n"
+"--\n"
+"\n"
+"Returns a dictionary containing build variables intended to be exposed by sysconfig.");
+
+#define _SYSCONFIG_CONFIG_VARS_METHODDEF    \
+    {"config_vars", (PyCFunction)_sysconfig_config_vars, METH_NOARGS, _sysconfig_config_vars__doc__},
+
+static PyObject *
+_sysconfig_config_vars_impl(PyObject *module);
+
+static PyObject *
+_sysconfig_config_vars(PyObject *module, PyObject *Py_UNUSED(ignored))
+{
+    return _sysconfig_config_vars_impl(module);
+}
+/*[clinic end generated code: output=25d395cf02eced1f input=a9049054013a1b77]*/
diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h
index e77a31b947f45e..0238d3a2f23149 100644
--- a/Modules/clinic/posixmodule.c.h
+++ b/Modules/clinic/posixmodule.c.h
@@ -1848,6 +1848,40 @@ os__getfinalpathname(PyObject *module, PyObject *arg)
 
 #if defined(MS_WINDOWS)
 
+PyDoc_STRVAR(os__findfirstfile__doc__,
+"_findfirstfile($module, path, /)\n"
+"--\n"
+"\n"
+"A function to get the real file name without accessing the file in Windows.");
+
+#define OS__FINDFIRSTFILE_METHODDEF    \
+    {"_findfirstfile", (PyCFunction)os__findfirstfile, METH_O, os__findfirstfile__doc__},
+
+static PyObject *
+os__findfirstfile_impl(PyObject *module, path_t *path);
+
+static PyObject *
+os__findfirstfile(PyObject *module, PyObject *arg)
+{
+    PyObject *return_value = NULL;
+    path_t path = PATH_T_INITIALIZE("_findfirstfile", "path", 0, 0);
+
+    if (!path_converter(arg, &path)) {
+        goto exit;
+    }
+    return_value = os__findfirstfile_impl(module, &path);
+
+exit:
+    /* Cleanup for path */
+    path_cleanup(&path);
+
+    return return_value;
+}
+
+#endif /* defined(MS_WINDOWS) */
+
+#if defined(MS_WINDOWS)
+
 PyDoc_STRVAR(os__getvolumepathname__doc__,
 "_getvolumepathname($module, /, path)\n"
 "--\n"
@@ -10425,11 +10459,9 @@ PyDoc_STRVAR(os_cpu_count__doc__,
 "cpu_count($module, /)\n"
 "--\n"
 "\n"
-"Return the number of CPUs in the system; return None if indeterminable.\n"
+"Return the number of logical CPUs in the system.\n"
 "\n"
-"This number is not equivalent to the number of CPUs the current process can\n"
-"use.  The number of usable CPUs can be obtained with\n"
-"``len(os.sched_getaffinity(0))``");
+"Return None if indeterminable.");
 
 #define OS_CPU_COUNT_METHODDEF    \
     {"cpu_count", (PyCFunction)os_cpu_count, METH_NOARGS, os_cpu_count__doc__},
@@ -11453,6 +11485,10 @@ os_waitstatus_to_exitcode(PyObject *module, PyObject *const *args, Py_ssize_t na
     #define OS__GETFINALPATHNAME_METHODDEF
 #endif /* !defined(OS__GETFINALPATHNAME_METHODDEF) */
 
+#ifndef OS__FINDFIRSTFILE_METHODDEF
+    #define OS__FINDFIRSTFILE_METHODDEF
+#endif /* !defined(OS__FINDFIRSTFILE_METHODDEF) */
+
 #ifndef OS__GETVOLUMEPATHNAME_METHODDEF
     #define OS__GETVOLUMEPATHNAME_METHODDEF
 #endif /* !defined(OS__GETVOLUMEPATHNAME_METHODDEF) */
@@ -11988,4 +12024,4 @@ os_waitstatus_to_exitcode(PyObject *module, PyObject *const *args, Py_ssize_t na
 #ifndef OS_WAITSTATUS_TO_EXITCODE_METHODDEF
     #define OS_WAITSTATUS_TO_EXITCODE_METHODDEF
 #endif /* !defined(OS_WAITSTATUS_TO_EXITCODE_METHODDEF) */
-/*[clinic end generated code: output=51aa26bc6a41e1da input=a9049054013a1b77]*/
+/*[clinic end generated code: output=a36904281a8a7507 input=a9049054013a1b77]*/
diff --git a/Modules/faulthandler.c b/Modules/faulthandler.c
index 4b6bf68be07202..a2e3c2300b3ce8 100644
--- a/Modules/faulthandler.c
+++ b/Modules/faulthandler.c
@@ -6,8 +6,10 @@
 #include "pycore_sysmodule.h"     // _PySys_GetAttr()
 #include "pycore_traceback.h"     // _Py_DumpTracebackThreads
 
-#include <object.h>
-#include <signal.h>
+#ifdef HAVE_UNISTD_H
+#  include <unistd.h>             // _exit()
+#endif
+#include <signal.h>               // sigaction()
 #include <stdlib.h>               // abort()
 #if defined(HAVE_PTHREAD_SIGMASK) && !defined(HAVE_BROKEN_PTHREAD_SIGMASK) && defined(HAVE_PTHREAD_H)
 #  include <pthread.h>
@@ -16,7 +18,7 @@
 #  include <windows.h>
 #endif
 #ifdef HAVE_SYS_RESOURCE_H
-#  include <sys/resource.h>
+#  include <sys/resource.h>       // setrlimit()
 #endif
 
 #if defined(FAULTHANDLER_USE_ALT_STACK) && defined(HAVE_LINUX_AUXVEC_H) && defined(HAVE_SYS_AUXV_H)
@@ -24,6 +26,7 @@
 #  include <sys/auxv.h>           // getauxval()
 #endif
 
+
 /* Allocate at maximum 100 MiB of the stack to raise the stack overflow */
 #define STACK_OVERFLOW_MAX_SIZE (100 * 1024 * 1024)
 
diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c
index 632cabdf4bcfbd..592d527f0bd6a2 100644
--- a/Modules/gcmodule.c
+++ b/Modules/gcmodule.c
@@ -24,6 +24,7 @@
 */
 
 #include "Python.h"
+#include "pycore_ceval.h"         // _Py_set_eval_breaker_bit()
 #include "pycore_context.h"
 #include "pycore_dict.h"          // _PyDict_MaybeUntrack()
 #include "pycore_initconfig.h"
@@ -2274,11 +2275,7 @@ _Py_ScheduleGC(PyInterpreterState *interp)
     if (gcstate->collecting == 1) {
         return;
     }
-    struct _ceval_state *ceval = &interp->ceval;
-    if (!_Py_atomic_load_relaxed(&ceval->gc_scheduled)) {
-        _Py_atomic_store_relaxed(&ceval->gc_scheduled, 1);
-        _Py_atomic_store_relaxed(&ceval->eval_breaker, 1);
-    }
+    _Py_set_eval_breaker_bit(interp, _PY_GC_SCHEDULED_BIT, 1);
 }
 
 void
diff --git a/Modules/getpath.c b/Modules/getpath.c
index 3b926cac0d3f24..6f76a84e78bf62 100644
--- a/Modules/getpath.c
+++ b/Modules/getpath.c
@@ -6,6 +6,7 @@
 #include "pycore_pathconfig.h"    // _PyPathConfig_ReadGlobal()
 #include "pycore_pyerrors.h"      // _PyErr_WriteUnraisableMsg()
 #include "pycore_pymem.h"         // _PyMem_RawWcsdup()
+#include "pycore_pystate.h"       // _PyThreadState_GET()
 
 #include "marshal.h"              // PyMarshal_ReadObjectFromString
 #include "osdefs.h"               // DELIM
@@ -821,7 +822,7 @@ _PyConfig_InitPathConfig(PyConfig *config, int compute_path_config)
         return status;
     }
 
-    if (!_PyThreadState_UncheckedGet()) {
+    if (!_PyThreadState_GET()) {
         return PyStatus_Error("cannot calculate path configuration without GIL");
     }
 
diff --git a/Modules/main.c b/Modules/main.c
index 05bedff050699f..b5ee34d0141daf 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -556,6 +556,11 @@ pymain_run_python(int *exitcode)
         goto error;
     }
 
+    // XXX Calculate config->sys_path_0 in getpath.py.
+    // The tricky part is that we can't check the path importers yet
+    // at that point.
+    assert(config->sys_path_0 == NULL);
+
     if (config->run_filename != NULL) {
         /* If filename is a package (ex: directory or ZIP file) which contains
            __main__.py, main_importer_path is set to filename and will be
@@ -571,29 +576,45 @@ pymain_run_python(int *exitcode)
     // import readline and rlcompleter before script dir is added to sys.path
     pymain_import_readline(config);
 
+    PyObject *path0 = NULL;
     if (main_importer_path != NULL) {
-        if (pymain_sys_path_add_path0(interp, main_importer_path) < 0) {
-            goto error;
-        }
+        path0 = Py_NewRef(main_importer_path);
     }
     else if (!config->safe_path) {
-        PyObject *path0 = NULL;
         int res = _PyPathConfig_ComputeSysPath0(&config->argv, &path0);
         if (res < 0) {
             goto error;
         }
-
-        if (res > 0) {
-            if (pymain_sys_path_add_path0(interp, path0) < 0) {
-                Py_DECREF(path0);
-                goto error;
-            }
+        else if (res == 0) {
+            Py_CLEAR(path0);
+        }
+    }
+    // XXX Apply config->sys_path_0 in init_interp_main().  We have
+    // to be sure to get readline/rlcompleter imported at the correct time.
+    if (path0 != NULL) {
+        wchar_t *wstr = PyUnicode_AsWideCharString(path0, NULL);
+        if (wstr == NULL) {
             Py_DECREF(path0);
+            goto error;
+        }
+        config->sys_path_0 = _PyMem_RawWcsdup(wstr);
+        PyMem_Free(wstr);
+        if (config->sys_path_0 == NULL) {
+            Py_DECREF(path0);
+            goto error;
+        }
+        int res = pymain_sys_path_add_path0(interp, path0);
+        Py_DECREF(path0);
+        if (res < 0) {
+            goto error;
         }
     }
 
     pymain_header(config);
 
+    _PyInterpreterState_SetRunningMain(interp);
+    assert(!PyErr_Occurred());
+
     if (config->run_command) {
         *exitcode = pymain_run_command(config->run_command);
     }
@@ -617,6 +638,7 @@ pymain_run_python(int *exitcode)
     *exitcode = pymain_exit_err_print();
 
 done:
+    _PyInterpreterState_SetNotRunningMain(interp);
     Py_XDECREF(main_importer_path);
 }
 
diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c
index d7d3e365d2c553..2c32a45a53277f 100644
--- a/Modules/posixmodule.c
+++ b/Modules/posixmodule.c
@@ -24,6 +24,10 @@
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
 #include "pycore_signal.h"        // Py_NSIG
 
+#ifdef HAVE_UNISTD_H
+#  include <unistd.h>             // symlink()
+#endif
+
 #ifdef MS_WINDOWS
 #  include <windows.h>
 #  if !defined(MS_WINDOWS_GAMES) || defined(MS_WINDOWS_DESKTOP)
@@ -37,7 +41,6 @@
 #  endif /* MS_WINDOWS_DESKTOP | MS_WINDOWS_SYSTEM */
 #endif
 
-
 #ifndef MS_WINDOWS
 #  include "posixmodule.h"
 #else
@@ -285,10 +288,6 @@ corresponding Unix manual entries for more information on calls.");
 #  include <sched.h>
 #endif
 
-#ifdef HAVE_COPY_FILE_RANGE
-#  include <unistd.h>             // copy_file_range()
-#endif
-
 #if !defined(CPU_ALLOC) && defined(HAVE_SCHED_SETAFFINITY)
 #  undef HAVE_SCHED_SETAFFINITY
 #endif
@@ -4810,6 +4809,37 @@ os__getfinalpathname_impl(PyObject *module, path_t *path)
     return result;
 }
 
+/*[clinic input]
+os._findfirstfile
+    path: path_t
+    /
+A function to get the real file name without accessing the file in Windows.
+[clinic start generated code]*/
+
+static PyObject *
+os__findfirstfile_impl(PyObject *module, path_t *path)
+/*[clinic end generated code: output=106dd3f0779c83dd input=0734dff70f60e1a8]*/
+{
+    PyObject *result;
+    HANDLE hFindFile;
+    WIN32_FIND_DATAW wFileData;
+    WCHAR *wRealFileName;
+
+    Py_BEGIN_ALLOW_THREADS
+    hFindFile = FindFirstFileW(path->wide, &wFileData);
+    Py_END_ALLOW_THREADS
+
+    if (hFindFile == INVALID_HANDLE_VALUE) {
+        path_error(path);
+        return NULL;
+    }
+
+    wRealFileName = wFileData.cFileName;
+    result = PyUnicode_FromWideChar(wRealFileName, wcslen(wRealFileName));
+    FindClose(hFindFile);
+    return result;
+}
+
 
 /*[clinic input]
 os._getvolumepathname
@@ -8134,39 +8164,45 @@ static PyObject *
 os_sched_getaffinity_impl(PyObject *module, pid_t pid)
 /*[clinic end generated code: output=f726f2c193c17a4f input=983ce7cb4a565980]*/
 {
-    int cpu, ncpus, count;
+    int ncpus = NCPUS_START;
     size_t setsize;
-    cpu_set_t *mask = NULL;
-    PyObject *res = NULL;
+    cpu_set_t *mask;
 
-    ncpus = NCPUS_START;
     while (1) {
         setsize = CPU_ALLOC_SIZE(ncpus);
         mask = CPU_ALLOC(ncpus);
-        if (mask == NULL)
+        if (mask == NULL) {
             return PyErr_NoMemory();
-        if (sched_getaffinity(pid, setsize, mask) == 0)
+        }
+        if (sched_getaffinity(pid, setsize, mask) == 0) {
             break;
+        }
         CPU_FREE(mask);
-        if (errno != EINVAL)
+        if (errno != EINVAL) {
             return posix_error();
+        }
         if (ncpus > INT_MAX / 2) {
-            PyErr_SetString(PyExc_OverflowError, "could not allocate "
-                            "a large enough CPU set");
+            PyErr_SetString(PyExc_OverflowError,
+                            "could not allocate a large enough CPU set");
             return NULL;
         }
-        ncpus = ncpus * 2;
+        ncpus *= 2;
     }
 
-    res = PySet_New(NULL);
-    if (res == NULL)
+    PyObject *res = PySet_New(NULL);
+    if (res == NULL) {
         goto error;
-    for (cpu = 0, count = CPU_COUNT_S(setsize, mask); count; cpu++) {
+    }
+
+    int cpu = 0;
+    int count = CPU_COUNT_S(setsize, mask);
+    for (; count; cpu++) {
         if (CPU_ISSET_S(cpu, setsize, mask)) {
             PyObject *cpu_num = PyLong_FromLong(cpu);
             --count;
-            if (cpu_num == NULL)
+            if (cpu_num == NULL) {
                 goto error;
+            }
             if (PySet_Add(res, cpu_num)) {
                 Py_DECREF(cpu_num);
                 goto error;
@@ -8178,12 +8214,12 @@ os_sched_getaffinity_impl(PyObject *module, pid_t pid)
     return res;
 
 error:
-    if (mask)
+    if (mask) {
         CPU_FREE(mask);
+    }
     Py_XDECREF(res);
     return NULL;
 }
-
 #endif /* HAVE_SCHED_SETAFFINITY */
 
 #endif /* HAVE_SCHED_H */
@@ -14334,44 +14370,49 @@ os_get_terminal_size_impl(PyObject *module, int fd)
 /*[clinic input]
 os.cpu_count
 
-Return the number of CPUs in the system; return None if indeterminable.
+Return the number of logical CPUs in the system.
 
-This number is not equivalent to the number of CPUs the current process can
-use.  The number of usable CPUs can be obtained with
-``len(os.sched_getaffinity(0))``
+Return None if indeterminable.
 [clinic start generated code]*/
 
 static PyObject *
 os_cpu_count_impl(PyObject *module)
-/*[clinic end generated code: output=5fc29463c3936a9c input=e7c8f4ba6dbbadd3]*/
+/*[clinic end generated code: output=5fc29463c3936a9c input=ba2f6f8980a0e2eb]*/
 {
-    int ncpu = 0;
+    int ncpu;
 #ifdef MS_WINDOWS
-#ifdef MS_WINDOWS_DESKTOP
+# ifdef MS_WINDOWS_DESKTOP
     ncpu = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS);
-#endif
+# else
+    ncpu = 0;
+# endif
+
 #elif defined(__hpux)
     ncpu = mpctl(MPC_GETNUMSPUS, NULL, NULL);
+
 #elif defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
     ncpu = sysconf(_SC_NPROCESSORS_ONLN);
+
 #elif defined(__VXWORKS__)
     ncpu = _Py_popcount32(vxCpuEnabledGet());
+
 #elif defined(__DragonFly__) || \
       defined(__OpenBSD__)   || \
       defined(__FreeBSD__)   || \
       defined(__NetBSD__)    || \
       defined(__APPLE__)
-    int mib[2];
+    ncpu = 0;
     size_t len = sizeof(ncpu);
-    mib[0] = CTL_HW;
-    mib[1] = HW_NCPU;
-    if (sysctl(mib, 2, &ncpu, &len, NULL, 0) != 0)
+    int mib[2] = {CTL_HW, HW_NCPU};
+    if (sysctl(mib, 2, &ncpu, &len, NULL, 0) != 0) {
         ncpu = 0;
+    }
 #endif
-    if (ncpu >= 1)
-        return PyLong_FromLong(ncpu);
-    else
+
+    if (ncpu < 1) {
         Py_RETURN_NONE;
+    }
+    return PyLong_FromLong(ncpu);
 }
 
 
@@ -15951,6 +15992,7 @@ static PyMethodDef posix_methods[] = {
     OS__GETFULLPATHNAME_METHODDEF
     OS__GETDISKUSAGE_METHODDEF
     OS__GETFINALPATHNAME_METHODDEF
+    OS__FINDFIRSTFILE_METHODDEF
     OS__GETVOLUMEPATHNAME_METHODDEF
     OS__PATH_SPLITROOT_METHODDEF
     OS__PATH_NORMPATH_METHODDEF
diff --git a/Modules/signalmodule.c b/Modules/signalmodule.c
index 8d6556727b3a5a..ac3457003b0cb6 100644
--- a/Modules/signalmodule.c
+++ b/Modules/signalmodule.c
@@ -1767,9 +1767,8 @@ PyErr_CheckSignals(void)
        Python code to ensure signals are handled. Checking for the GC here
        allows long running native code to clean cycles created using the C-API
        even if it doesn't run the evaluation loop */
-    struct _ceval_state *interp_ceval_state = &tstate->interp->ceval;
-    if (_Py_atomic_load_relaxed(&interp_ceval_state->gc_scheduled)) {
-        _Py_atomic_store_relaxed(&interp_ceval_state->gc_scheduled, 0);
+    if (_Py_eval_breaker_bit_is_set(tstate->interp, _PY_GC_SCHEDULED_BIT)) {
+        _Py_set_eval_breaker_bit(tstate->interp, _PY_GC_SCHEDULED_BIT, 0);
         _Py_RunGC(tstate);
     }
 
diff --git a/Modules/termios.c b/Modules/termios.c
index c779a757e4fa9b..9fc2673ce0e788 100644
--- a/Modules/termios.c
+++ b/Modules/termios.c
@@ -120,7 +120,7 @@ termios_tcgetattr_impl(PyObject *module, int fd)
         v = PyBytes_FromStringAndSize(&ch, 1);
         if (v == NULL)
             goto err;
-        PyList_SetItem(cc, i, v);
+        PyList_SET_ITEM(cc, i, v);
     }
 
     /* Convert the MIN and TIME slots to integer.  On some systems, the
@@ -128,29 +128,44 @@ termios_tcgetattr_impl(PyObject *module, int fd)
        only do this in noncanonical input mode.  */
     if ((mode.c_lflag & ICANON) == 0) {
         v = PyLong_FromLong((long)mode.c_cc[VMIN]);
-        if (v == NULL)
+        if (v == NULL) {
+            goto err;
+        }
+        if (PyList_SetItem(cc, VMIN, v) < 0) {
             goto err;
-        PyList_SetItem(cc, VMIN, v);
+        }
         v = PyLong_FromLong((long)mode.c_cc[VTIME]);
-        if (v == NULL)
+        if (v == NULL) {
             goto err;
-        PyList_SetItem(cc, VTIME, v);
+        }
+        if (PyList_SetItem(cc, VTIME, v) < 0) {
+            goto err;
+        }
     }
 
-    if (!(v = PyList_New(7)))
-        goto err;
-
-    PyList_SetItem(v, 0, PyLong_FromLong((long)mode.c_iflag));
-    PyList_SetItem(v, 1, PyLong_FromLong((long)mode.c_oflag));
-    PyList_SetItem(v, 2, PyLong_FromLong((long)mode.c_cflag));
-    PyList_SetItem(v, 3, PyLong_FromLong((long)mode.c_lflag));
-    PyList_SetItem(v, 4, PyLong_FromLong((long)ispeed));
-    PyList_SetItem(v, 5, PyLong_FromLong((long)ospeed));
-    if (PyErr_Occurred()) {
-        Py_DECREF(v);
+    if (!(v = PyList_New(7))) {
         goto err;
     }
-    PyList_SetItem(v, 6, cc);
+
+#define ADD_LONG_ITEM(index, val) \
+    do { \
+        PyObject *l = PyLong_FromLong((long)val); \
+        if (l == NULL) { \
+            Py_DECREF(v); \
+            goto err; \
+        } \
+        PyList_SET_ITEM(v, index, l); \
+    } while (0)
+
+    ADD_LONG_ITEM(0, mode.c_iflag);
+    ADD_LONG_ITEM(1, mode.c_oflag);
+    ADD_LONG_ITEM(2, mode.c_cflag);
+    ADD_LONG_ITEM(3, mode.c_lflag);
+    ADD_LONG_ITEM(4, ispeed);
+    ADD_LONG_ITEM(5, ospeed);
+#undef ADD_LONG_ITEM
+
+    PyList_SET_ITEM(v, 6, cc);
     return v;
   err:
     Py_DECREF(cc);
@@ -197,17 +212,25 @@ termios_tcsetattr_impl(PyObject *module, int fd, int when, PyObject *term)
         return PyErr_SetFromErrno(state->TermiosError);
     }
 
-    mode.c_iflag = (tcflag_t) PyLong_AsLong(PyList_GetItem(term, 0));
-    mode.c_oflag = (tcflag_t) PyLong_AsLong(PyList_GetItem(term, 1));
-    mode.c_cflag = (tcflag_t) PyLong_AsLong(PyList_GetItem(term, 2));
-    mode.c_lflag = (tcflag_t) PyLong_AsLong(PyList_GetItem(term, 3));
-    speed_t ispeed = (speed_t) PyLong_AsLong(PyList_GetItem(term, 4));
-    speed_t ospeed = (speed_t) PyLong_AsLong(PyList_GetItem(term, 5));
-    PyObject *cc = PyList_GetItem(term, 6);
-    if (PyErr_Occurred()) {
-        return NULL;
-    }
-
+    speed_t ispeed, ospeed;
+#define SET_FROM_LIST(TYPE, VAR, LIST, N) do {  \
+    PyObject *item = PyList_GET_ITEM(LIST, N);  \
+    long num = PyLong_AsLong(item);             \
+    if (num == -1 && PyErr_Occurred()) {        \
+        return NULL;                            \
+    }                                           \
+    VAR = (TYPE)num;                            \
+} while (0)
+
+    SET_FROM_LIST(tcflag_t, mode.c_iflag, term, 0);
+    SET_FROM_LIST(tcflag_t, mode.c_oflag, term, 1);
+    SET_FROM_LIST(tcflag_t, mode.c_cflag, term, 2);
+    SET_FROM_LIST(tcflag_t, mode.c_lflag, term, 3);
+    SET_FROM_LIST(speed_t, ispeed, term, 4);
+    SET_FROM_LIST(speed_t, ospeed, term, 5);
+#undef SET_FROM_LIST
+
+    PyObject *cc = PyList_GET_ITEM(term, 6);
     if (!PyList_Check(cc) || PyList_Size(cc) != NCCS) {
         PyErr_Format(PyExc_TypeError,
             "tcsetattr: attributes[6] must be %d element list",
@@ -222,8 +245,13 @@ termios_tcsetattr_impl(PyObject *module, int fd, int when, PyObject *term)
 
         if (PyBytes_Check(v) && PyBytes_Size(v) == 1)
             mode.c_cc[i] = (cc_t) * PyBytes_AsString(v);
-        else if (PyLong_Check(v))
-            mode.c_cc[i] = (cc_t) PyLong_AsLong(v);
+        else if (PyLong_Check(v)) {
+            long num = PyLong_AsLong(v);
+            if (num == -1 && PyErr_Occurred()) {
+                return NULL;
+            }
+            mode.c_cc[i] = (cc_t)num;
+        }
         else {
             PyErr_SetString(PyExc_TypeError,
      "tcsetattr: elements of attributes must be characters or integers");
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 1fb795f5097897..361f8e93064b25 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -5649,7 +5649,7 @@ _PyObject_FreeInstanceAttributes(PyObject *self)
 }
 
 int
-_PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg)
+PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg)
 {
     PyTypeObject *tp = Py_TYPE(obj);
     if((tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0) {
@@ -5672,7 +5672,7 @@ _PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg)
 }
 
 void
-_PyObject_ClearManagedDict(PyObject *obj)
+PyObject_ClearManagedDict(PyObject *obj)
 {
     PyTypeObject *tp = Py_TYPE(obj);
     if((tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0) {
diff --git a/Objects/fileobject.c b/Objects/fileobject.c
index 066172baf9f027..5522eba34eace9 100644
--- a/Objects/fileobject.c
+++ b/Objects/fileobject.c
@@ -4,15 +4,19 @@
 #include "pycore_call.h"          // _PyObject_CallNoArgs()
 #include "pycore_runtime.h"       // _PyRuntime
 
+#ifdef HAVE_UNISTD_H
+#  include <unistd.h>             // isatty()
+#endif
+
 #if defined(HAVE_GETC_UNLOCKED) && !defined(_Py_MEMORY_SANITIZER)
-/* clang MemorySanitizer doesn't yet understand getc_unlocked. */
-#define GETC(f) getc_unlocked(f)
-#define FLOCKFILE(f) flockfile(f)
-#define FUNLOCKFILE(f) funlockfile(f)
+   /* clang MemorySanitizer doesn't yet understand getc_unlocked. */
+#  define GETC(f) getc_unlocked(f)
+#  define FLOCKFILE(f) flockfile(f)
+#  define FUNLOCKFILE(f) funlockfile(f)
 #else
-#define GETC(f) getc(f)
-#define FLOCKFILE(f)
-#define FUNLOCKFILE(f)
+#  define GETC(f) getc(f)
+#  define FLOCKFILE(f)
+#  define FUNLOCKFILE(f)
 #endif
 
 /* Newline flags */
diff --git a/Objects/structseq.c b/Objects/structseq.c
index 0ca622edc2ba37..e4a4b45a8db626 100644
--- a/Objects/structseq.c
+++ b/Objects/structseq.c
@@ -8,6 +8,7 @@
 */
 
 #include "Python.h"
+#include "pycore_dict.h"          // _PyDict_Pop()
 #include "pycore_tuple.h"         // _PyTuple_FromArray()
 #include "pycore_object.h"        // _PyObject_GC_TRACK()
 
@@ -216,19 +217,34 @@ structseq_new_impl(PyTypeObject *type, PyObject *arg, PyObject *dict)
         res->ob_item[i] = Py_NewRef(v);
     }
     Py_DECREF(arg);
-    for (; i < max_len; ++i) {
-        PyObject *ob = NULL;
-        if (dict != NULL) {
-            const char *name = type->tp_members[i-n_unnamed_fields].name;
+    if (dict != NULL && PyDict_GET_SIZE(dict) > 0) {
+        Py_ssize_t n_found_keys = 0;
+        for (i = len; i < max_len; ++i) {
+            PyObject *ob = NULL;
+            const char *name = type->tp_members[i - n_unnamed_fields].name;
             if (PyDict_GetItemStringRef(dict, name, &ob) < 0) {
                 Py_DECREF(res);
                 return NULL;
             }
+            if (ob == NULL) {
+                ob = Py_NewRef(Py_None);
+            }
+            else {
+                ++n_found_keys;
+            }
+            res->ob_item[i] = ob;
+        }
+        if (PyDict_GET_SIZE(dict) > n_found_keys) {
+            PyErr_Format(PyExc_TypeError,
+                         "%.500s() got duplicate or unexpected field name(s)",
+                         type->tp_name);
+            Py_DECREF(res);
+            return NULL;
         }
-        if (ob == NULL) {
-            ob = Py_NewRef(Py_None);
+    } else {
+        for (i = len; i < max_len; ++i) {
+            res->ob_item[i] = Py_NewRef(Py_None);
         }
-        res->ob_item[i] = ob;
     }
 
     _PyObject_GC_TRACK(res);
@@ -365,9 +381,82 @@ structseq_reduce(PyStructSequence* self, PyObject *Py_UNUSED(ignored))
     return NULL;
 }
 
+
+static PyObject *
+structseq_replace(PyStructSequence *self, PyObject *args, PyObject *kwargs)
+{
+    PyStructSequence *result = NULL;
+    Py_ssize_t n_fields, n_unnamed_fields, i;
+
+    if (!_PyArg_NoPositional("__replace__", args)) {
+        return NULL;
+    }
+
+    n_fields = REAL_SIZE(self);
+    if (n_fields < 0) {
+        return NULL;
+    }
+    n_unnamed_fields = UNNAMED_FIELDS(self);
+    if (n_unnamed_fields < 0) {
+        return NULL;
+    }
+    if (n_unnamed_fields > 0) {
+        PyErr_Format(PyExc_TypeError,
+                     "__replace__() is not supported for %.500s "
+                     "because it has unnamed field(s)",
+                     Py_TYPE(self)->tp_name);
+        return NULL;
+    }
+
+    result = (PyStructSequence *) PyStructSequence_New(Py_TYPE(self));
+    if (!result) {
+        return NULL;
+    }
+
+    if (kwargs != NULL) {
+        // We do not support types with unnamed fields, so we can iterate over
+        // i >= n_visible_fields case without slicing with (i - n_unnamed_fields).
+        for (i = 0; i < n_fields; ++i) {
+            PyObject *key = PyUnicode_FromString(Py_TYPE(self)->tp_members[i].name);
+            if (!key) {
+                goto error;
+            }
+            PyObject *ob = _PyDict_Pop(kwargs, key, self->ob_item[i]);
+            Py_DECREF(key);
+            if (!ob) {
+                goto error;
+            }
+            result->ob_item[i] = ob;
+        }
+        // Check if there are any unexpected fields.
+        if (PyDict_GET_SIZE(kwargs) > 0) {
+            PyObject *names = PyDict_Keys(kwargs);
+            if (names) {
+                PyErr_Format(PyExc_TypeError, "Got unexpected field name(s): %R", names);
+                Py_DECREF(names);
+            }
+            goto error;
+        }
+    }
+    else
+    {
+        // Just create a copy of the original.
+        for (i = 0; i < n_fields; ++i) {
+            result->ob_item[i] = Py_NewRef(self->ob_item[i]);
+        }
+    }
+
+    return (PyObject *)result;
+
+error:
+    Py_DECREF(result);
+    return NULL;
+}
+
 static PyMethodDef structseq_methods[] = {
     {"__reduce__", (PyCFunction)structseq_reduce, METH_NOARGS, NULL},
-    {NULL, NULL}
+    {"__replace__", _PyCFunction_CAST(structseq_replace), METH_VARARGS | METH_KEYWORDS, NULL},
+    {NULL, NULL}  // sentinel
 };
 
 static Py_ssize_t
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 893d8420bba4c4..3261a14a053dc8 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -1835,7 +1835,7 @@ subtype_traverse(PyObject *self, visitproc visit, void *arg)
         assert(base->tp_dictoffset == 0);
         if (type->tp_flags & Py_TPFLAGS_MANAGED_DICT) {
             assert(type->tp_dictoffset == -1);
-            int err = _PyObject_VisitManagedDict(self, visit, arg);
+            int err = PyObject_VisitManagedDict(self, visit, arg);
             if (err) {
                 return err;
             }
@@ -1905,7 +1905,7 @@ subtype_clear(PyObject *self)
        __dict__ slots (as in the case 'self.__dict__ is self'). */
     if (type->tp_flags & Py_TPFLAGS_MANAGED_DICT) {
         if ((base->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0) {
-            _PyObject_ClearManagedDict(self);
+            PyObject_ClearManagedDict(self);
         }
     }
     else if (type->tp_dictoffset != base->tp_dictoffset) {
diff --git a/Objects/typevarobject.c b/Objects/typevarobject.c
index 0f04523b0032ed..73cdf48788efe1 100644
--- a/Objects/typevarobject.c
+++ b/Objects/typevarobject.c
@@ -200,7 +200,7 @@ typevar_dealloc(PyObject *self)
     Py_XDECREF(tv->evaluate_bound);
     Py_XDECREF(tv->constraints);
     Py_XDECREF(tv->evaluate_constraints);
-    _PyObject_ClearManagedDict(self);
+    PyObject_ClearManagedDict(self);
     PyObject_ClearWeakRefs(self);
 
     Py_TYPE(self)->tp_free(self);
@@ -216,7 +216,7 @@ typevar_traverse(PyObject *self, visitproc visit, void *arg)
     Py_VISIT(tv->evaluate_bound);
     Py_VISIT(tv->constraints);
     Py_VISIT(tv->evaluate_constraints);
-    _PyObject_VisitManagedDict(self, visit, arg);
+    PyObject_VisitManagedDict(self, visit, arg);
     return 0;
 }
 
@@ -227,7 +227,7 @@ typevar_clear(typevarobject *self)
     Py_CLEAR(self->evaluate_bound);
     Py_CLEAR(self->constraints);
     Py_CLEAR(self->evaluate_constraints);
-    _PyObject_ClearManagedDict((PyObject *)self);
+    PyObject_ClearManagedDict((PyObject *)self);
     return 0;
 }
 
@@ -744,7 +744,7 @@ paramspec_dealloc(PyObject *self)
 
     Py_DECREF(ps->name);
     Py_XDECREF(ps->bound);
-    _PyObject_ClearManagedDict(self);
+    PyObject_ClearManagedDict(self);
     PyObject_ClearWeakRefs(self);
 
     Py_TYPE(self)->tp_free(self);
@@ -757,7 +757,7 @@ paramspec_traverse(PyObject *self, visitproc visit, void *arg)
     Py_VISIT(Py_TYPE(self));
     paramspecobject *ps = (paramspecobject *)self;
     Py_VISIT(ps->bound);
-    _PyObject_VisitManagedDict(self, visit, arg);
+    PyObject_VisitManagedDict(self, visit, arg);
     return 0;
 }
 
@@ -765,7 +765,7 @@ static int
 paramspec_clear(paramspecobject *self)
 {
     Py_CLEAR(self->bound);
-    _PyObject_ClearManagedDict((PyObject *)self);
+    PyObject_ClearManagedDict((PyObject *)self);
     return 0;
 }
 
@@ -1026,7 +1026,7 @@ typevartuple_dealloc(PyObject *self)
     typevartupleobject *tvt = (typevartupleobject *)self;
 
     Py_DECREF(tvt->name);
-    _PyObject_ClearManagedDict(self);
+    PyObject_ClearManagedDict(self);
     PyObject_ClearWeakRefs(self);
 
     Py_TYPE(self)->tp_free(self);
@@ -1165,14 +1165,14 @@ static int
 typevartuple_traverse(PyObject *self, visitproc visit, void *arg)
 {
     Py_VISIT(Py_TYPE(self));
-    _PyObject_VisitManagedDict(self, visit, arg);
+    PyObject_VisitManagedDict(self, visit, arg);
     return 0;
 }
 
 static int
 typevartuple_clear(PyObject *self)
 {
-    _PyObject_ClearManagedDict(self);
+    PyObject_ClearManagedDict(self);
     return 0;
 }
 
diff --git a/PC/config.c b/PC/config.c
index 88f69758aac764..da2bde640961e0 100644
--- a/PC/config.c
+++ b/PC/config.c
@@ -22,6 +22,7 @@ extern PyObject* PyInit__sha1(void);
 extern PyObject* PyInit__sha2(void);
 extern PyObject* PyInit__sha3(void);
 extern PyObject* PyInit__statistics(void);
+extern PyObject* PyInit__sysconfig(void);
 extern PyObject* PyInit__typing(void);
 extern PyObject* PyInit__blake2(void);
 extern PyObject* PyInit_time(void);
@@ -102,6 +103,7 @@ struct _inittab _PyImport_Inittab[] = {
     {"_sha2", PyInit__sha2},
     {"_sha3", PyInit__sha3},
     {"_blake2", PyInit__blake2},
+    {"_sysconfig", PyInit__sysconfig},
     {"time", PyInit_time},
     {"_thread", PyInit__thread},
     {"_tokenize", PyInit__tokenize},
diff --git a/PC/launcher2.c b/PC/launcher2.c
index bb500d4b6bfb07..116091f01227b8 100644
--- a/PC/launcher2.c
+++ b/PC/launcher2.c
@@ -195,6 +195,13 @@ join(wchar_t *buffer, size_t bufferLength, const wchar_t *fragment)
 }
 
 
+bool
+split_parent(wchar_t *buffer, size_t bufferLength)
+{
+    return SUCCEEDED(PathCchRemoveFileSpec(buffer, bufferLength));
+}
+
+
 int
 _compare(const wchar_t *x, int xLen, const wchar_t *y, int yLen)
 {
@@ -414,8 +421,8 @@ typedef struct {
     // if true, treats 'tag' as a non-PEP 514 filter
     bool oldStyleTag;
     // if true, ignores 'tag' when a high priority environment is found
-    // gh-92817: This is currently set when a tag is read from configuration or
-    // the environment, rather than the command line or a shebang line, and the
+    // gh-92817: This is currently set when a tag is read from configuration,
+    // the environment, or a shebang, rather than the command line, and the
     // only currently possible high priority environment is an active virtual
     // environment
     bool lowPriorityTag;
@@ -794,6 +801,8 @@ searchPath(SearchInfo *search, const wchar_t *shebang, int shebangLength)
         }
     }
 
+    debug(L"# Search PATH for %s\n", filename);
+
     wchar_t pathVariable[MAXLEN];
     int n = GetEnvironmentVariableW(L"PATH", pathVariable, MAXLEN);
     if (!n) {
@@ -1031,8 +1040,11 @@ checkShebang(SearchInfo *search)
     debug(L"Shebang: %s\n", shebang);
 
     // Handle shebangs that we should search PATH for
+    int executablePathWasSetByUsrBinEnv = 0;
     exitCode = searchPath(search, shebang, shebangLength);
-    if (exitCode != RC_NO_SHEBANG) {
+    if (exitCode == 0) {
+        executablePathWasSetByUsrBinEnv = 1;
+    } else if (exitCode != RC_NO_SHEBANG) {
         return exitCode;
     }
 
@@ -1067,7 +1079,7 @@ checkShebang(SearchInfo *search)
             search->tagLength = commandLength;
             // If we had 'python3.12.exe' then we want to strip the suffix
             // off of the tag
-            if (search->tagLength > 4) {
+            if (search->tagLength >= 4) {
                 const wchar_t *suffix = &search->tag[search->tagLength - 4];
                 if (0 == _comparePath(suffix, 4, L".exe", -1)) {
                     search->tagLength -= 4;
@@ -1075,13 +1087,14 @@ checkShebang(SearchInfo *search)
             }
             // If we had 'python3_d' then we want to strip the '_d' (any
             // '.exe' is already gone)
-            if (search->tagLength > 2) {
+            if (search->tagLength >= 2) {
                 const wchar_t *suffix = &search->tag[search->tagLength - 2];
                 if (0 == _comparePath(suffix, 2, L"_d", -1)) {
                     search->tagLength -= 2;
                 }
             }
             search->oldStyleTag = true;
+            search->lowPriorityTag = true;
             search->executableArgs = &command[commandLength];
             search->executableArgsLength = shebangLength - commandLength;
             if (search->tag && search->tagLength) {
@@ -1095,6 +1108,11 @@ checkShebang(SearchInfo *search)
         }
     }
 
+    // Didn't match a template, but we found it on PATH
+    if (executablePathWasSetByUsrBinEnv) {
+        return 0;
+    }
+
     // Unrecognised executables are first tried as command aliases
     commandLength = 0;
     while (commandLength < shebangLength && !isspace(shebang[commandLength])) {
@@ -1765,7 +1783,15 @@ virtualenvSearch(const SearchInfo *search, EnvironmentInfo **result)
         return 0;
     }
 
-    if (INVALID_FILE_ATTRIBUTES == GetFileAttributesW(buffer)) {
+    DWORD attr = GetFileAttributesW(buffer);
+    if (INVALID_FILE_ATTRIBUTES == attr && search->lowPriorityTag) {
+        if (!split_parent(buffer, MAXLEN) || !join(buffer, MAXLEN, L"python.exe")) {
+            return 0;
+        }
+        attr = GetFileAttributesW(buffer);
+    }
+
+    if (INVALID_FILE_ATTRIBUTES == attr) {
         debug(L"Python executable %s missing from virtual env\n", buffer);
         return 0;
     }
diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index 1ec106777db56d..43a79fd5938486 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -240,6 +240,7 @@
     <ClInclude Include="..\Include\internal\pycore_hashtable.h" />
     <ClInclude Include="..\Include\internal\pycore_identifier.h" />
     <ClInclude Include="..\Include\internal\pycore_import.h" />
+    <ClInclude Include="..\Include\internal\pycore_importdl.h" />
     <ClInclude Include="..\Include\internal\pycore_initconfig.h" />
     <ClInclude Include="..\Include\internal\pycore_interp.h" />
     <ClInclude Include="..\Include\internal\pycore_intrinsics.h" />
@@ -367,7 +368,6 @@
     <ClInclude Include="..\PC\errmap.h" />
     <ClInclude Include="..\PC\pyconfig.h" />
     <ClInclude Include="..\Python\condvar.h" />
-    <ClInclude Include="..\Python\importdl.h" />
     <ClInclude Include="..\Python\stdlib_module_names.h" />
     <ClInclude Include="..\Python\thread_nt.h" />
   </ItemGroup>
@@ -438,6 +438,7 @@
     <ClCompile Include="..\Modules\signalmodule.c" />
     <ClCompile Include="..\Modules\_statisticsmodule.c" />
     <ClCompile Include="..\Modules\symtablemodule.c" />
+    <ClCompile Include="..\Modules\_sysconfig.c" />
     <ClCompile Include="..\Modules\_threadmodule.c" />
     <ClCompile Include="..\Modules\_tracemalloc.c" />
     <ClCompile Include="..\Modules\_typingmodule.c" />
diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters
index f381120c9b035a..59159ed609968b 100644
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -300,9 +300,6 @@
     <ClInclude Include="..\PC\pyconfig.h">
       <Filter>PC</Filter>
     </ClInclude>
-    <ClInclude Include="..\Python\importdl.h">
-      <Filter>Python</Filter>
-    </ClInclude>
     <ClInclude Include="..\Python\stdlib_module_names.h">
       <Filter>Python</Filter>
     </ClInclude>
@@ -633,6 +630,9 @@
     <ClInclude Include="..\Include\internal\pycore_import.h">
       <Filter>Include\internal</Filter>
     </ClInclude>
+    <ClInclude Include="..\Include\internal\pycore_importdl.h">
+      <Filter>Include\internal</Filter>
+    </ClInclude>
     <ClInclude Include="..\Include\internal\pycore_initconfig.h">
       <Filter>Include\internal</Filter>
     </ClInclude>
@@ -959,6 +959,9 @@
     <ClCompile Include="..\Modules\symtablemodule.c">
       <Filter>Modules</Filter>
     </ClCompile>
+    <ClCompile Include="..\Modules\_sysconfig.c">
+      <Filter>Modules</Filter>
+    </ClCompile>
     <ClCompile Include="..\Modules\_threadmodule.c">
       <Filter>Modules</Filter>
     </ClCompile>
diff --git a/PCbuild/readme.txt b/PCbuild/readme.txt
index 199aacdf7687ed..98b37014907604 100644
--- a/PCbuild/readme.txt
+++ b/PCbuild/readme.txt
@@ -293,3 +293,31 @@ project, with some projects overriding certain specific values. The GUI
 doesn't always reflect the correct settings and may confuse the user
 with false information, especially for settings that automatically adapt
 for different configurations.
+
+Add a new project
+-----------------
+
+For example, add a new _testclinic_limited project to build a new
+_testclinic_limited extension, the file Modules/_testclinic_limited.c:
+
+* In PCbuild/, copy _testclinic.vcxproj to _testclinic_limited.vcxproj,
+  replace RootNamespace value with `_testclinic_limited`, replace
+  `_asyncio.c` with `_testclinic_limited.c`.
+* Open Visual Studio, open PCbuild\pcbuild.sln solution, add the
+  PCbuild\_testclinic_limited.vcxproj project to the solution ("add existing
+  project).
+* Add a dependency on the python project to the new _testclinic_limited
+  project.
+* Save and exit Visual Studio.
+* Add `;_testclinic_limited` to `<TestModules Include="...">` in
+  PCbuild\pcbuild.proj.
+* Update "exts" in Tools\msi\lib\lib_files.wxs file or in
+  Tools\msi\test\test_files.wxs file (for tests).
+* PC\layout\main.py needs updating if you add a test-only extension whose name
+  doesn't start with "_test".
+* Add the extension to PCbuild\readme.txt (this file).
+* Build Python from scratch (clean the solution) to check that the new project
+  is built successfully.
+* Ensure the new .vcxproj and .vcxproj.filters files are added to your commit,
+  as well as the changes to pcbuild.sln, pcbuild.proj and any other modified
+  files.
diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c
index 36e0750220a30d..b8713a329d4ef6 100644
--- a/Parser/action_helpers.c
+++ b/Parser/action_helpers.c
@@ -998,18 +998,38 @@ _PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, in
         return NULL;
     }
 
-    // This is needed to keep compatibility with 3.11, where an empty format spec is parsed
-    // as an *empty* JoinedStr node, instead of having an empty constant in it.
-    if (asdl_seq_LEN(spec) == 1) {
-        expr_ty e = asdl_seq_GET(spec, 0);
-        if (e->kind == Constant_kind
-                && PyUnicode_Check(e->v.Constant.value)
-                && PyUnicode_GetLength(e->v.Constant.value) == 0) {
-            spec = _Py_asdl_expr_seq_new(0, arena);
+    // This is needed to keep compatibility with 3.11, where an empty format
+    // spec is parsed as an *empty* JoinedStr node, instead of having an empty
+    // constant in it.
+    Py_ssize_t n_items = asdl_seq_LEN(spec);
+    Py_ssize_t non_empty_count = 0;
+    for (Py_ssize_t i = 0; i < n_items; i++) {
+        expr_ty item = asdl_seq_GET(spec, i);
+        non_empty_count += !(item->kind == Constant_kind &&
+                             PyUnicode_CheckExact(item->v.Constant.value) &&
+                             PyUnicode_GET_LENGTH(item->v.Constant.value) == 0);
+    }
+    if (non_empty_count != n_items) {
+        asdl_expr_seq *resized_spec =
+            _Py_asdl_expr_seq_new(non_empty_count, p->arena);
+        if (resized_spec == NULL) {
+            return NULL;
+        }
+        Py_ssize_t j = 0;
+        for (Py_ssize_t i = 0; i < n_items; i++) {
+            expr_ty item = asdl_seq_GET(spec, i);
+            if (item->kind == Constant_kind &&
+                PyUnicode_CheckExact(item->v.Constant.value) &&
+                PyUnicode_GET_LENGTH(item->v.Constant.value) == 0) {
+                continue;
+            }
+            asdl_seq_SET(resized_spec, j++, item);
         }
+        assert(j == non_empty_count);
+        spec = resized_spec;
     }
-
-    expr_ty res = _PyAST_JoinedStr(spec, lineno, col_offset, end_lineno, end_col_offset, p->arena);
+    expr_ty res = _PyAST_JoinedStr(spec, lineno, col_offset, end_lineno,
+                                   end_col_offset, p->arena);
     if (!res) {
         return NULL;
     }
diff --git a/Parser/myreadline.c b/Parser/myreadline.c
index 815387388218c6..719a178f244a28 100644
--- a/Parser/myreadline.c
+++ b/Parser/myreadline.c
@@ -14,11 +14,15 @@
 #include "pycore_pystate.h"   // _PyThreadState_GET()
 #ifdef MS_WINDOWS
 #  ifndef WIN32_LEAN_AND_MEAN
-#  define WIN32_LEAN_AND_MEAN
+#    define WIN32_LEAN_AND_MEAN
 #  endif
 #  include "windows.h"
 #endif /* MS_WINDOWS */
 
+#ifdef HAVE_UNISTD_H
+#  include <unistd.h>             // isatty()
+#endif
+
 
 // Export the symbol since it's used by the readline shared extension
 PyAPI_DATA(PyThreadState*) _PyOS_ReadlineTState;
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 46b7159ff0516b..5e3816f59af35d 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -4,10 +4,12 @@
 #include "Python.h"
 #include "pycore_call.h"          // _PyObject_CallNoArgs()
 
-#include <assert.h>
+#include "tokenizer.h"            // struct tok_state
+#include "errcode.h"              // E_OK
 
-#include "tokenizer.h"
-#include "errcode.h"
+#ifdef HAVE_UNISTD_H
+#  include <unistd.h>             // read()
+#endif
 
 /* Alternate tab spacing */
 #define ALTTABSIZE 1
@@ -2688,11 +2690,28 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
         if (tok->done == E_ERROR) {
             return MAKE_TOKEN(ERRORTOKEN);
         }
-        if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) {
+        int in_format_spec = (
+                current_tok->last_expr_end != -1
+                &&
+                INSIDE_FSTRING_EXPR(current_tok)
+        );
+
+       if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) {
             if (tok->decoding_erred) {
                 return MAKE_TOKEN(ERRORTOKEN);
             }
 
+            // If we are in a format spec and we found a newline,
+            // it means that the format spec ends here and we should
+            // return to the regular mode.
+            if (in_format_spec && c == '\n') {
+                tok_backup(tok, c);
+                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
+                p_start = tok->start;
+                p_end = tok->cur;
+                return MAKE_TOKEN(FSTRING_MIDDLE);
+            }
+
             assert(tok->multi_line_start != NULL);
             // shift the tok_state's location into
             // the start of string, and report the error
@@ -2724,11 +2743,6 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
             end_quote_size = 0;
         }
 
-        int in_format_spec = (
-                current_tok->last_expr_end != -1
-                &&
-                INSIDE_FSTRING_EXPR(current_tok)
-        );
         if (c == '{') {
             int peek = tok_nextc(tok);
             if (peek != '{' || in_format_spec) {
diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h
index 61b1db9e5a1543..04fe07fad39937 100644
--- a/Python/abstract_interp_cases.c.h
+++ b/Python/abstract_interp_cases.c.h
@@ -474,6 +474,28 @@
             break;
         }
 
+        case _CHECK_ATTR_MODULE: {
+            break;
+        }
+
+        case _LOAD_ATTR_MODULE: {
+            STACK_GROW(((oparg & 1) ? 1 : 0));
+            PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true);
+            PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-(oparg & 1 ? 1 : 0))), true);
+            break;
+        }
+
+        case _CHECK_ATTR_WITH_HINT: {
+            break;
+        }
+
+        case _LOAD_ATTR_WITH_HINT: {
+            STACK_GROW(((oparg & 1) ? 1 : 0));
+            PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true);
+            PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-(oparg & 1 ? 1 : 0))), true);
+            break;
+        }
+
         case _LOAD_ATTR_SLOT: {
             STACK_GROW(((oparg & 1) ? 1 : 0));
             PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true);
@@ -481,16 +503,23 @@
             break;
         }
 
-        case _GUARD_DORV_VALUES: {
+        case _CHECK_ATTR_CLASS: {
             break;
         }
 
-        case _STORE_ATTR_INSTANCE_VALUE: {
-            STACK_SHRINK(2);
+        case _LOAD_ATTR_CLASS: {
+            STACK_GROW(((oparg & 1) ? 1 : 0));
+            PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true);
+            PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-(oparg & 1 ? 1 : 0))), true);
             break;
         }
 
-        case _GUARD_TYPE_VERSION_STORE: {
+        case _GUARD_DORV_VALUES: {
+            break;
+        }
+
+        case _STORE_ATTR_INSTANCE_VALUE: {
+            STACK_SHRINK(2);
             break;
         }
 
@@ -674,6 +703,29 @@
             break;
         }
 
+        case _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: {
+            PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true);
+            PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(0)), true);
+            break;
+        }
+
+        case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT: {
+            PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true);
+            PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(0)), true);
+            break;
+        }
+
+        case _CHECK_ATTR_METHOD_LAZY_DICT: {
+            break;
+        }
+
+        case _LOAD_ATTR_METHOD_LAZY_DICT: {
+            STACK_GROW(1);
+            PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-2)), true);
+            PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true);
+            break;
+        }
+
         case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: {
             break;
         }
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
index 69056bf23f4058..c373585c0986ce 100644
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -17,6 +17,11 @@
 
 #include "clinic/bltinmodule.c.h"
 
+#ifdef HAVE_UNISTD_H
+#  include <unistd.h>             // isatty()
+#endif
+
+
 static PyObject*
 update_bases(PyObject *bases, PyObject *const *args, Py_ssize_t nargs)
 {
diff --git a/Python/bootstrap_hash.c b/Python/bootstrap_hash.c
index ef693e5df1fcc4..92f2301a012c0a 100644
--- a/Python/bootstrap_hash.c
+++ b/Python/bootstrap_hash.c
@@ -4,22 +4,25 @@
 #include "pycore_pylifecycle.h"   // _PyOS_URandomNonblock()
 #include "pycore_runtime.h"       // _PyRuntime
 
+#ifdef HAVE_UNISTD_H
+#  include <unistd.h>             // close()
+#endif
 #ifdef MS_WINDOWS
 #  include <windows.h>
 #  include <bcrypt.h>
 #else
-#  include <fcntl.h>
+#  include <fcntl.h>              // O_RDONLY
 #  ifdef HAVE_SYS_STAT_H
 #    include <sys/stat.h>
 #  endif
 #  ifdef HAVE_LINUX_RANDOM_H
-#    include <linux/random.h>
+#    include <linux/random.h>     // GRND_NONBLOCK
 #  endif
 #  if defined(HAVE_SYS_RANDOM_H) && (defined(HAVE_GETRANDOM) || defined(HAVE_GETENTROPY))
-#    include <sys/random.h>
+#    include <sys/random.h>       // getrandom()
 #  endif
 #  if !defined(HAVE_GETRANDOM) && defined(HAVE_GETRANDOM_SYSCALL)
-#    include <sys/syscall.h>
+#    include <sys/syscall.h>      // SYS_getrandom
 #  endif
 #endif
 
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index f7681bd234a43f..9b733ce4a8c14b 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -41,8 +41,6 @@
 #include "ceval_macros.h"
 
 /* Flow control macros */
-#define DEOPT_IF(cond, instname) ((void)0)
-#define ERROR_IF(cond, labelname) ((void)0)
 #define GO_TO_INSTRUCTION(instname) ((void)0)
 
 #define inst(name, ...) case name:
@@ -138,7 +136,12 @@ dummy_func(
         inst(RESUME, (--)) {
             TIER_ONE_ONLY
             assert(frame == tstate->current_frame);
-            if (_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version) {
+            uintptr_t global_version =
+                _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) &
+                ~_PY_EVAL_EVENTS_MASK;
+            uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
+            assert((code_version & 255) == 0);
+            if (code_version != global_version) {
                 int err = _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp);
                 ERROR_IF(err, error);
                 next_instr--;
@@ -153,21 +156,19 @@ dummy_func(
 
         inst(RESUME_CHECK, (--)) {
 #if defined(__EMSCRIPTEN__)
-            DEOPT_IF(_Py_emscripten_signal_clock == 0, RESUME);
+            DEOPT_IF(_Py_emscripten_signal_clock == 0);
             _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING;
 #endif
-            /* Possibly combine these two checks */
-            DEOPT_IF(_PyFrame_GetCode(frame)->_co_instrumentation_version
-                != tstate->interp->monitoring_version, RESUME);
-            DEOPT_IF(_Py_atomic_load_relaxed_int32(&tstate->interp->ceval.eval_breaker), RESUME);
+            uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker);
+            uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
+            assert((version & _PY_EVAL_EVENTS_MASK) == 0);
+            DEOPT_IF(eval_breaker != version);
         }
 
         inst(INSTRUMENTED_RESUME, (--)) {
-            /* Possible performance enhancement:
-             *   We need to check the eval breaker anyway, can we
-             * combine the instrument verison check and the eval breaker test?
-             */
-            if (_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version) {
+            uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & ~_PY_EVAL_EVENTS_MASK;
+            uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
+            if (code_version != global_version) {
                 if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) {
                     goto error;
                 }
@@ -325,12 +326,12 @@ dummy_func(
         }
 
         inst(TO_BOOL_BOOL, (unused/1, unused/2, value -- value)) {
-            DEOPT_IF(!PyBool_Check(value), TO_BOOL);
+            DEOPT_IF(!PyBool_Check(value));
             STAT_INC(TO_BOOL, hit);
         }
 
         inst(TO_BOOL_INT, (unused/1, unused/2, value -- res)) {
-            DEOPT_IF(!PyLong_CheckExact(value), TO_BOOL);
+            DEOPT_IF(!PyLong_CheckExact(value));
             STAT_INC(TO_BOOL, hit);
             if (_PyLong_IsZero((PyLongObject *)value)) {
                 assert(_Py_IsImmortal(value));
@@ -343,7 +344,7 @@ dummy_func(
         }
 
         inst(TO_BOOL_LIST, (unused/1, unused/2, value -- res)) {
-            DEOPT_IF(!PyList_CheckExact(value), TO_BOOL);
+            DEOPT_IF(!PyList_CheckExact(value));
             STAT_INC(TO_BOOL, hit);
             res = Py_SIZE(value) ? Py_True : Py_False;
             DECREF_INPUTS();
@@ -351,13 +352,13 @@ dummy_func(
 
         inst(TO_BOOL_NONE, (unused/1, unused/2, value -- res)) {
             // This one is a bit weird, because we expect *some* failures:
-            DEOPT_IF(!Py_IsNone(value), TO_BOOL);
+            DEOPT_IF(!Py_IsNone(value));
             STAT_INC(TO_BOOL, hit);
             res = Py_False;
         }
 
         inst(TO_BOOL_STR, (unused/1, unused/2, value -- res)) {
-            DEOPT_IF(!PyUnicode_CheckExact(value), TO_BOOL);
+            DEOPT_IF(!PyUnicode_CheckExact(value));
             STAT_INC(TO_BOOL, hit);
             if (value == &_Py_STR(empty)) {
                 assert(_Py_IsImmortal(value));
@@ -373,7 +374,7 @@ dummy_func(
         inst(TO_BOOL_ALWAYS_TRUE, (unused/1, version/2, value -- res)) {
             // This one is a bit weird, because we expect *some* failures:
             assert(version);
-            DEOPT_IF(Py_TYPE(value)->tp_version_tag != version, TO_BOOL);
+            DEOPT_IF(Py_TYPE(value)->tp_version_tag != version);
             STAT_INC(TO_BOOL, hit);
             DECREF_INPUTS();
             res = Py_True;
@@ -397,8 +398,8 @@ dummy_func(
         };
 
         op(_GUARD_BOTH_INT, (left, right -- left, right)) {
-            DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP);
-            DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP);
+            DEOPT_IF(!PyLong_CheckExact(left));
+            DEOPT_IF(!PyLong_CheckExact(right));
         }
 
         op(_BINARY_OP_MULTIPLY_INT, (unused/1, left, right -- res)) {
@@ -433,8 +434,8 @@ dummy_func(
             _GUARD_BOTH_INT + _BINARY_OP_SUBTRACT_INT;
 
         op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) {
-            DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP);
-            DEOPT_IF(!PyFloat_CheckExact(right), BINARY_OP);
+            DEOPT_IF(!PyFloat_CheckExact(left));
+            DEOPT_IF(!PyFloat_CheckExact(right));
         }
 
         op(_BINARY_OP_MULTIPLY_FLOAT, (unused/1, left, right -- res)) {
@@ -469,8 +470,8 @@ dummy_func(
             _GUARD_BOTH_FLOAT + _BINARY_OP_SUBTRACT_FLOAT;
 
         op(_GUARD_BOTH_UNICODE, (left, right -- left, right)) {
-            DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_OP);
-            DEOPT_IF(!PyUnicode_CheckExact(right), BINARY_OP);
+            DEOPT_IF(!PyUnicode_CheckExact(left));
+            DEOPT_IF(!PyUnicode_CheckExact(right));
         }
 
         op(_BINARY_OP_ADD_UNICODE, (unused/1, left, right -- res)) {
@@ -494,7 +495,7 @@ dummy_func(
             _Py_CODEUNIT true_next = next_instr[INLINE_CACHE_ENTRIES_BINARY_OP];
             assert(true_next.op.code == STORE_FAST);
             PyObject **target_local = &GETLOCAL(true_next.op.arg);
-            DEOPT_IF(*target_local != left, BINARY_OP);
+            DEOPT_IF(*target_local != left);
             STAT_INC(BINARY_OP, hit);
             /* Handle `left = left + right` or `left += right` for str.
              *
@@ -574,13 +575,13 @@ dummy_func(
         }
 
         inst(BINARY_SUBSCR_LIST_INT, (unused/1, list, sub -- res)) {
-            DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR);
-            DEOPT_IF(!PyList_CheckExact(list), BINARY_SUBSCR);
+            DEOPT_IF(!PyLong_CheckExact(sub));
+            DEOPT_IF(!PyList_CheckExact(list));
 
             // Deopt unless 0 <= sub < PyList_Size(list)
-            DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub), BINARY_SUBSCR);
+            DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub));
             Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0];
-            DEOPT_IF(index >= PyList_GET_SIZE(list), BINARY_SUBSCR);
+            DEOPT_IF(index >= PyList_GET_SIZE(list));
             STAT_INC(BINARY_SUBSCR, hit);
             res = PyList_GET_ITEM(list, index);
             assert(res != NULL);
@@ -590,14 +591,14 @@ dummy_func(
         }
 
         inst(BINARY_SUBSCR_STR_INT, (unused/1, str, sub -- res)) {
-            DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR);
-            DEOPT_IF(!PyUnicode_CheckExact(str), BINARY_SUBSCR);
-            DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub), BINARY_SUBSCR);
+            DEOPT_IF(!PyLong_CheckExact(sub));
+            DEOPT_IF(!PyUnicode_CheckExact(str));
+            DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub));
             Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0];
-            DEOPT_IF(PyUnicode_GET_LENGTH(str) <= index, BINARY_SUBSCR);
+            DEOPT_IF(PyUnicode_GET_LENGTH(str) <= index);
             // Specialize for reading an ASCII character from any string:
             Py_UCS4 c = PyUnicode_READ_CHAR(str, index);
-            DEOPT_IF(Py_ARRAY_LENGTH(_Py_SINGLETON(strings).ascii) <= c, BINARY_SUBSCR);
+            DEOPT_IF(Py_ARRAY_LENGTH(_Py_SINGLETON(strings).ascii) <= c);
             STAT_INC(BINARY_SUBSCR, hit);
             res = (PyObject*)&_Py_SINGLETON(strings).ascii[c];
             _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free);
@@ -605,13 +606,13 @@ dummy_func(
         }
 
         inst(BINARY_SUBSCR_TUPLE_INT, (unused/1, tuple, sub -- res)) {
-            DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR);
-            DEOPT_IF(!PyTuple_CheckExact(tuple), BINARY_SUBSCR);
+            DEOPT_IF(!PyLong_CheckExact(sub));
+            DEOPT_IF(!PyTuple_CheckExact(tuple));
 
             // Deopt unless 0 <= sub < PyTuple_Size(list)
-            DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub), BINARY_SUBSCR);
+            DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub));
             Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0];
-            DEOPT_IF(index >= PyTuple_GET_SIZE(tuple), BINARY_SUBSCR);
+            DEOPT_IF(index >= PyTuple_GET_SIZE(tuple));
             STAT_INC(BINARY_SUBSCR, hit);
             res = PyTuple_GET_ITEM(tuple, index);
             assert(res != NULL);
@@ -621,7 +622,7 @@ dummy_func(
         }
 
         inst(BINARY_SUBSCR_DICT, (unused/1, dict, sub -- res)) {
-            DEOPT_IF(!PyDict_CheckExact(dict), BINARY_SUBSCR);
+            DEOPT_IF(!PyDict_CheckExact(dict));
             STAT_INC(BINARY_SUBSCR, hit);
             res = PyDict_GetItemWithError(dict, sub);
             if (res == NULL) {
@@ -636,19 +637,19 @@ dummy_func(
         }
 
         inst(BINARY_SUBSCR_GETITEM, (unused/1, container, sub -- unused)) {
-            DEOPT_IF(tstate->interp->eval_frame, BINARY_SUBSCR);
+            DEOPT_IF(tstate->interp->eval_frame);
             PyTypeObject *tp = Py_TYPE(container);
-            DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE), BINARY_SUBSCR);
+            DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE));
             PyHeapTypeObject *ht = (PyHeapTypeObject *)tp;
             PyObject *cached = ht->_spec_cache.getitem;
-            DEOPT_IF(cached == NULL, BINARY_SUBSCR);
+            DEOPT_IF(cached == NULL);
             assert(PyFunction_Check(cached));
             PyFunctionObject *getitem = (PyFunctionObject *)cached;
             uint32_t cached_version = ht->_spec_cache.getitem_version;
-            DEOPT_IF(getitem->func_version != cached_version, BINARY_SUBSCR);
+            DEOPT_IF(getitem->func_version != cached_version);
             PyCodeObject *code = (PyCodeObject *)getitem->func_code;
             assert(code->co_argcount == 2);
-            DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR);
+            DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize));
             STAT_INC(BINARY_SUBSCR, hit);
             Py_INCREF(getitem);
             _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2);
@@ -693,14 +694,14 @@ dummy_func(
         }
 
         inst(STORE_SUBSCR_LIST_INT, (unused/1, value, list, sub -- )) {
-            DEOPT_IF(!PyLong_CheckExact(sub), STORE_SUBSCR);
-            DEOPT_IF(!PyList_CheckExact(list), STORE_SUBSCR);
+            DEOPT_IF(!PyLong_CheckExact(sub));
+            DEOPT_IF(!PyList_CheckExact(list));
 
             // Ensure nonnegative, zero-or-one-digit ints.
-            DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub), STORE_SUBSCR);
+            DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub));
             Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0];
             // Ensure index < len(list)
-            DEOPT_IF(index >= PyList_GET_SIZE(list), STORE_SUBSCR);
+            DEOPT_IF(index >= PyList_GET_SIZE(list));
             STAT_INC(STORE_SUBSCR, hit);
 
             PyObject *old_value = PyList_GET_ITEM(list, index);
@@ -712,7 +713,7 @@ dummy_func(
         }
 
         inst(STORE_SUBSCR_DICT, (unused/1, value, dict, sub -- )) {
-            DEOPT_IF(!PyDict_CheckExact(dict), STORE_SUBSCR);
+            DEOPT_IF(!PyDict_CheckExact(dict));
             STAT_INC(STORE_SUBSCR, hit);
             int err = _PyDict_SetItem_Take2((PyDictObject *)dict, sub, value);
             Py_DECREF(dict);
@@ -1009,11 +1010,10 @@ dummy_func(
         }
 
         inst(SEND_GEN, (unused/1, receiver, v -- receiver, unused)) {
-            DEOPT_IF(tstate->interp->eval_frame, SEND);
+            DEOPT_IF(tstate->interp->eval_frame);
             PyGenObject *gen = (PyGenObject *)receiver;
-            DEOPT_IF(Py_TYPE(gen) != &PyGen_Type &&
-                     Py_TYPE(gen) != &PyCoro_Type, SEND);
-            DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, SEND);
+            DEOPT_IF(Py_TYPE(gen) != &PyGen_Type && Py_TYPE(gen) != &PyCoro_Type);
+            DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING);
             STAT_INC(SEND, hit);
             _PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
             STACK_SHRINK(1);
@@ -1194,8 +1194,8 @@ dummy_func(
         }
 
         inst(UNPACK_SEQUENCE_TWO_TUPLE, (unused/1, seq -- values[oparg])) {
-            DEOPT_IF(!PyTuple_CheckExact(seq), UNPACK_SEQUENCE);
-            DEOPT_IF(PyTuple_GET_SIZE(seq) != 2, UNPACK_SEQUENCE);
+            DEOPT_IF(!PyTuple_CheckExact(seq));
+            DEOPT_IF(PyTuple_GET_SIZE(seq) != 2);
             assert(oparg == 2);
             STAT_INC(UNPACK_SEQUENCE, hit);
             values[0] = Py_NewRef(PyTuple_GET_ITEM(seq, 1));
@@ -1204,8 +1204,8 @@ dummy_func(
         }
 
         inst(UNPACK_SEQUENCE_TUPLE, (unused/1, seq -- values[oparg])) {
-            DEOPT_IF(!PyTuple_CheckExact(seq), UNPACK_SEQUENCE);
-            DEOPT_IF(PyTuple_GET_SIZE(seq) != oparg, UNPACK_SEQUENCE);
+            DEOPT_IF(!PyTuple_CheckExact(seq));
+            DEOPT_IF(PyTuple_GET_SIZE(seq) != oparg);
             STAT_INC(UNPACK_SEQUENCE, hit);
             PyObject **items = _PyTuple_ITEMS(seq);
             for (int i = oparg; --i >= 0; ) {
@@ -1215,8 +1215,8 @@ dummy_func(
         }
 
         inst(UNPACK_SEQUENCE_LIST, (unused/1, seq -- values[oparg])) {
-            DEOPT_IF(!PyList_CheckExact(seq), UNPACK_SEQUENCE);
-            DEOPT_IF(PyList_GET_SIZE(seq) != oparg, UNPACK_SEQUENCE);
+            DEOPT_IF(!PyList_CheckExact(seq));
+            DEOPT_IF(PyList_GET_SIZE(seq) != oparg);
             STAT_INC(UNPACK_SEQUENCE, hit);
             PyObject **items = _PyList_ITEMS(seq);
             for (int i = oparg; --i >= 0; ) {
@@ -1412,15 +1412,15 @@ dummy_func(
 
         op(_GUARD_GLOBALS_VERSION, (version/1 --)) {
             PyDictObject *dict = (PyDictObject *)GLOBALS();
-            DEOPT_IF(!PyDict_CheckExact(dict), LOAD_GLOBAL);
-            DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL);
+            DEOPT_IF(!PyDict_CheckExact(dict));
+            DEOPT_IF(dict->ma_keys->dk_version != version);
             assert(DK_IS_UNICODE(dict->ma_keys));
         }
 
         op(_GUARD_BUILTINS_VERSION, (version/1 --)) {
             PyDictObject *dict = (PyDictObject *)BUILTINS();
-            DEOPT_IF(!PyDict_CheckExact(dict), LOAD_GLOBAL);
-            DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL);
+            DEOPT_IF(!PyDict_CheckExact(dict));
+            DEOPT_IF(dict->ma_keys->dk_version != version);
             assert(DK_IS_UNICODE(dict->ma_keys));
         }
 
@@ -1428,7 +1428,7 @@ dummy_func(
             PyDictObject *dict = (PyDictObject *)GLOBALS();
             PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys);
             res = entries[index].me_value;
-            DEOPT_IF(res == NULL, LOAD_GLOBAL);
+            DEOPT_IF(res == NULL);
             Py_INCREF(res);
             STAT_INC(LOAD_GLOBAL, hit);
             null = NULL;
@@ -1438,7 +1438,7 @@ dummy_func(
             PyDictObject *bdict = (PyDictObject *)BUILTINS();
             PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(bdict->ma_keys);
             res = entries[index].me_value;
-            DEOPT_IF(res == NULL, LOAD_GLOBAL);
+            DEOPT_IF(res == NULL);
             Py_INCREF(res);
             STAT_INC(LOAD_GLOBAL, hit);
             null = NULL;
@@ -1763,8 +1763,8 @@ dummy_func(
 
         inst(LOAD_SUPER_ATTR_ATTR, (unused/1, global_super, class, self -- attr, unused if (0))) {
             assert(!(oparg & 1));
-            DEOPT_IF(global_super != (PyObject *)&PySuper_Type, LOAD_SUPER_ATTR);
-            DEOPT_IF(!PyType_Check(class), LOAD_SUPER_ATTR);
+            DEOPT_IF(global_super != (PyObject *)&PySuper_Type);
+            DEOPT_IF(!PyType_Check(class));
             STAT_INC(LOAD_SUPER_ATTR, hit);
             PyObject *name = GETITEM(FRAME_CO_NAMES, oparg >> 2);
             attr = _PySuper_Lookup((PyTypeObject *)class, self, name, NULL);
@@ -1774,8 +1774,8 @@ dummy_func(
 
         inst(LOAD_SUPER_ATTR_METHOD, (unused/1, global_super, class, self -- attr, self_or_null)) {
             assert(oparg & 1);
-            DEOPT_IF(global_super != (PyObject *)&PySuper_Type, LOAD_SUPER_ATTR);
-            DEOPT_IF(!PyType_Check(class), LOAD_SUPER_ATTR);
+            DEOPT_IF(global_super != (PyObject *)&PySuper_Type);
+            DEOPT_IF(!PyType_Check(class));
             STAT_INC(LOAD_SUPER_ATTR, hit);
             PyObject *name = GETITEM(FRAME_CO_NAMES, oparg >> 2);
             PyTypeObject *cls = (PyTypeObject *)class;
@@ -1864,22 +1864,20 @@ dummy_func(
         op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) {
             PyTypeObject *tp = Py_TYPE(owner);
             assert(type_version != 0);
-            DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR);
+            DEOPT_IF(tp->tp_version_tag != type_version);
         }
 
         op(_CHECK_MANAGED_OBJECT_HAS_VALUES, (owner -- owner)) {
             assert(Py_TYPE(owner)->tp_dictoffset < 0);
             assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
             PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner);
-            DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) &&
-                     !_PyObject_MakeInstanceAttributesFromDict(owner, dorv),
-                     LOAD_ATTR);
+            DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && !_PyObject_MakeInstanceAttributesFromDict(owner, dorv));
         }
 
         op(_LOAD_ATTR_INSTANCE_VALUE, (index/1, owner -- attr, null if (oparg & 1))) {
             PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
             attr = _PyDictOrValues_GetValues(dorv)->values[index];
-            DEOPT_IF(attr == NULL, LOAD_ATTR);
+            DEOPT_IF(attr == NULL);
             STAT_INC(LOAD_ATTR, hit);
             Py_INCREF(attr);
             null = NULL;
@@ -1893,56 +1891,74 @@ dummy_func(
             _LOAD_ATTR_INSTANCE_VALUE +
             unused/5;  // Skip over rest of cache
 
-        inst(LOAD_ATTR_MODULE, (unused/1, type_version/2, index/1, unused/5, owner -- attr, null if (oparg & 1))) {
-            DEOPT_IF(!PyModule_CheckExact(owner), LOAD_ATTR);
+        op(_CHECK_ATTR_MODULE, (type_version/2, owner -- owner)) {
+            DEOPT_IF(!PyModule_CheckExact(owner));
             PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner)->md_dict;
             assert(dict != NULL);
-            DEOPT_IF(dict->ma_keys->dk_version != type_version, LOAD_ATTR);
+            DEOPT_IF(dict->ma_keys->dk_version != type_version);
+        }
+
+        op(_LOAD_ATTR_MODULE, (index/1, owner -- attr, null if (oparg & 1))) {
+            PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner)->md_dict;
             assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE);
             assert(index < dict->ma_keys->dk_nentries);
             PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + index;
             attr = ep->me_value;
-            DEOPT_IF(attr == NULL, LOAD_ATTR);
+            DEOPT_IF(attr == NULL);
             STAT_INC(LOAD_ATTR, hit);
             Py_INCREF(attr);
             null = NULL;
             DECREF_INPUTS();
         }
 
-        inst(LOAD_ATTR_WITH_HINT, (unused/1, type_version/2, index/1, unused/5, owner -- attr, null if (oparg & 1))) {
-            PyTypeObject *tp = Py_TYPE(owner);
-            assert(type_version != 0);
-            DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR);
-            assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT);
+        macro(LOAD_ATTR_MODULE) =
+            unused/1 +
+            _CHECK_ATTR_MODULE +
+            _LOAD_ATTR_MODULE +
+            unused/5;
+
+        op(_CHECK_ATTR_WITH_HINT, (owner -- owner)) {
+            assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
             PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
-            DEOPT_IF(_PyDictOrValues_IsValues(dorv), LOAD_ATTR);
+            DEOPT_IF(_PyDictOrValues_IsValues(dorv));
             PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(dorv);
-            DEOPT_IF(dict == NULL, LOAD_ATTR);
+            DEOPT_IF(dict == NULL);
             assert(PyDict_CheckExact((PyObject *)dict));
+        }
+
+        op(_LOAD_ATTR_WITH_HINT, (hint/1, owner -- attr, null if (oparg & 1))) {
+            PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
+            PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(dorv);
+            DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries);
             PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1);
-            uint16_t hint = index;
-            DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries, LOAD_ATTR);
             if (DK_IS_UNICODE(dict->ma_keys)) {
                 PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint;
-                DEOPT_IF(ep->me_key != name, LOAD_ATTR);
+                DEOPT_IF(ep->me_key != name);
                 attr = ep->me_value;
             }
             else {
                 PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + hint;
-                DEOPT_IF(ep->me_key != name, LOAD_ATTR);
+                DEOPT_IF(ep->me_key != name);
                 attr = ep->me_value;
             }
-            DEOPT_IF(attr == NULL, LOAD_ATTR);
+            DEOPT_IF(attr == NULL);
             STAT_INC(LOAD_ATTR, hit);
             Py_INCREF(attr);
             null = NULL;
             DECREF_INPUTS();
         }
 
+        macro(LOAD_ATTR_WITH_HINT) =
+            unused/1 +
+            _GUARD_TYPE_VERSION +
+            _CHECK_ATTR_WITH_HINT +
+            _LOAD_ATTR_WITH_HINT +
+            unused/5;
+
         op(_LOAD_ATTR_SLOT, (index/1, owner -- attr, null if (oparg & 1))) {
             char *addr = (char *)owner + index;
             attr = *(PyObject **)addr;
-            DEOPT_IF(attr == NULL, LOAD_ATTR);
+            DEOPT_IF(attr == NULL);
             STAT_INC(LOAD_ATTR, hit);
             Py_INCREF(attr);
             null = NULL;
@@ -1955,35 +1971,41 @@ dummy_func(
             _LOAD_ATTR_SLOT +  // NOTE: This action may also deopt
             unused/5;
 
-        inst(LOAD_ATTR_CLASS, (unused/1, type_version/2, unused/2, descr/4, owner -- attr, null if (oparg & 1))) {
-
-            DEOPT_IF(!PyType_Check(owner), LOAD_ATTR);
-            DEOPT_IF(((PyTypeObject *)owner)->tp_version_tag != type_version,
-                LOAD_ATTR);
+        op(_CHECK_ATTR_CLASS, (type_version/2, owner -- owner)) {
+            DEOPT_IF(!PyType_Check(owner));
             assert(type_version != 0);
+            DEOPT_IF(((PyTypeObject *)owner)->tp_version_tag != type_version);
+
+        }
 
+        op(_LOAD_ATTR_CLASS, (descr/4, owner -- attr, null if (oparg & 1))) {
             STAT_INC(LOAD_ATTR, hit);
+            assert(descr != NULL);
+            attr = Py_NewRef(descr);
             null = NULL;
-            attr = descr;
-            assert(attr != NULL);
-            Py_INCREF(attr);
             DECREF_INPUTS();
         }
 
+        macro(LOAD_ATTR_CLASS) =
+            unused/1 +
+            _CHECK_ATTR_CLASS +
+            unused/2 +
+            _LOAD_ATTR_CLASS;
+
         inst(LOAD_ATTR_PROPERTY, (unused/1, type_version/2, func_version/2, fget/4, owner -- unused, unused if (0))) {
             assert((oparg & 1) == 0);
-            DEOPT_IF(tstate->interp->eval_frame, LOAD_ATTR);
+            DEOPT_IF(tstate->interp->eval_frame);
 
             PyTypeObject *cls = Py_TYPE(owner);
-            DEOPT_IF(cls->tp_version_tag != type_version, LOAD_ATTR);
+            DEOPT_IF(cls->tp_version_tag != type_version);
             assert(type_version != 0);
             assert(Py_IS_TYPE(fget, &PyFunction_Type));
             PyFunctionObject *f = (PyFunctionObject *)fget;
             assert(func_version != 0);
-            DEOPT_IF(f->func_version != func_version, LOAD_ATTR);
+            DEOPT_IF(f->func_version != func_version);
             PyCodeObject *code = (PyCodeObject *)f->func_code;
             assert(code->co_argcount == 1);
-            DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), LOAD_ATTR);
+            DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize));
             STAT_INC(LOAD_ATTR, hit);
             Py_INCREF(fget);
             _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, f, 1);
@@ -1997,17 +2019,17 @@ dummy_func(
 
         inst(LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN, (unused/1, type_version/2, func_version/2, getattribute/4, owner -- unused, unused if (0))) {
             assert((oparg & 1) == 0);
-            DEOPT_IF(tstate->interp->eval_frame, LOAD_ATTR);
+            DEOPT_IF(tstate->interp->eval_frame);
             PyTypeObject *cls = Py_TYPE(owner);
-            DEOPT_IF(cls->tp_version_tag != type_version, LOAD_ATTR);
+            DEOPT_IF(cls->tp_version_tag != type_version);
             assert(type_version != 0);
             assert(Py_IS_TYPE(getattribute, &PyFunction_Type));
             PyFunctionObject *f = (PyFunctionObject *)getattribute;
             assert(func_version != 0);
-            DEOPT_IF(f->func_version != func_version, LOAD_ATTR);
+            DEOPT_IF(f->func_version != func_version);
             PyCodeObject *code = (PyCodeObject *)f->func_code;
             assert(code->co_argcount == 2);
-            DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), LOAD_ATTR);
+            DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize));
             STAT_INC(LOAD_ATTR, hit);
 
             PyObject *name = GETITEM(FRAME_CO_NAMES, oparg >> 1);
@@ -2025,7 +2047,7 @@ dummy_func(
         op(_GUARD_DORV_VALUES, (owner -- owner)) {
             assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
             PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
-            DEOPT_IF(!_PyDictOrValues_IsValues(dorv), STORE_ATTR);
+            DEOPT_IF(!_PyDictOrValues_IsValues(dorv));
         }
 
         op(_STORE_ATTR_INSTANCE_VALUE, (index/1, value, owner --)) {
@@ -2045,37 +2067,37 @@ dummy_func(
 
         macro(STORE_ATTR_INSTANCE_VALUE) =
             unused/1 +
-            _GUARD_TYPE_VERSION_STORE +
+            _GUARD_TYPE_VERSION +
             _GUARD_DORV_VALUES +
             _STORE_ATTR_INSTANCE_VALUE;
 
         inst(STORE_ATTR_WITH_HINT, (unused/1, type_version/2, hint/1, value, owner --)) {
             PyTypeObject *tp = Py_TYPE(owner);
             assert(type_version != 0);
-            DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR);
+            DEOPT_IF(tp->tp_version_tag != type_version);
             assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT);
             PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
-            DEOPT_IF(_PyDictOrValues_IsValues(dorv), STORE_ATTR);
+            DEOPT_IF(_PyDictOrValues_IsValues(dorv));
             PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(dorv);
-            DEOPT_IF(dict == NULL, STORE_ATTR);
+            DEOPT_IF(dict == NULL);
             assert(PyDict_CheckExact((PyObject *)dict));
             PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
-            DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries, STORE_ATTR);
+            DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries);
             PyObject *old_value;
             uint64_t new_version;
             if (DK_IS_UNICODE(dict->ma_keys)) {
                 PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint;
-                DEOPT_IF(ep->me_key != name, STORE_ATTR);
+                DEOPT_IF(ep->me_key != name);
                 old_value = ep->me_value;
-                DEOPT_IF(old_value == NULL, STORE_ATTR);
+                DEOPT_IF(old_value == NULL);
                 new_version = _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, value);
                 ep->me_value = value;
             }
             else {
                 PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + hint;
-                DEOPT_IF(ep->me_key != name, STORE_ATTR);
+                DEOPT_IF(ep->me_key != name);
                 old_value = ep->me_value;
-                DEOPT_IF(old_value == NULL, STORE_ATTR);
+                DEOPT_IF(old_value == NULL);
                 new_version = _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, value);
                 ep->me_value = value;
             }
@@ -2090,12 +2112,6 @@ dummy_func(
             Py_DECREF(owner);
         }
 
-        op(_GUARD_TYPE_VERSION_STORE, (type_version/2, owner -- owner)) {
-            PyTypeObject *tp = Py_TYPE(owner);
-            assert(type_version != 0);
-            DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR);
-        }
-
         op(_STORE_ATTR_SLOT, (index/1, value, owner --)) {
             char *addr = (char *)owner + index;
             STAT_INC(STORE_ATTR, hit);
@@ -2107,7 +2123,7 @@ dummy_func(
 
         macro(STORE_ATTR_SLOT) =
             unused/1 +
-            _GUARD_TYPE_VERSION_STORE +
+            _GUARD_TYPE_VERSION +
             _STORE_ATTR_SLOT;
 
         family(COMPARE_OP, INLINE_CACHE_ENTRIES_COMPARE_OP) = {
@@ -2140,8 +2156,8 @@ dummy_func(
         }
 
         inst(COMPARE_OP_FLOAT, (unused/1, left, right -- res)) {
-            DEOPT_IF(!PyFloat_CheckExact(left), COMPARE_OP);
-            DEOPT_IF(!PyFloat_CheckExact(right), COMPARE_OP);
+            DEOPT_IF(!PyFloat_CheckExact(left));
+            DEOPT_IF(!PyFloat_CheckExact(right));
             STAT_INC(COMPARE_OP, hit);
             double dleft = PyFloat_AS_DOUBLE(left);
             double dright = PyFloat_AS_DOUBLE(right);
@@ -2155,10 +2171,10 @@ dummy_func(
 
         // Similar to COMPARE_OP_FLOAT
         inst(COMPARE_OP_INT, (unused/1, left, right -- res)) {
-            DEOPT_IF(!PyLong_CheckExact(left), COMPARE_OP);
-            DEOPT_IF(!PyLong_CheckExact(right), COMPARE_OP);
-            DEOPT_IF(!_PyLong_IsCompact((PyLongObject *)left), COMPARE_OP);
-            DEOPT_IF(!_PyLong_IsCompact((PyLongObject *)right), COMPARE_OP);
+            DEOPT_IF(!PyLong_CheckExact(left));
+            DEOPT_IF(!PyLong_CheckExact(right));
+            DEOPT_IF(!_PyLong_IsCompact((PyLongObject *)left));
+            DEOPT_IF(!_PyLong_IsCompact((PyLongObject *)right));
             STAT_INC(COMPARE_OP, hit);
             assert(_PyLong_DigitCount((PyLongObject *)left) <= 1 &&
                    _PyLong_DigitCount((PyLongObject *)right) <= 1);
@@ -2174,8 +2190,8 @@ dummy_func(
 
         // Similar to COMPARE_OP_FLOAT, but for ==, != only
         inst(COMPARE_OP_STR, (unused/1, left, right -- res)) {
-            DEOPT_IF(!PyUnicode_CheckExact(left), COMPARE_OP);
-            DEOPT_IF(!PyUnicode_CheckExact(right), COMPARE_OP);
+            DEOPT_IF(!PyUnicode_CheckExact(left));
+            DEOPT_IF(!PyUnicode_CheckExact(right));
             STAT_INC(COMPARE_OP, hit);
             int eq = _PyUnicode_Equal(left, right);
             assert((oparg >> 5) == Py_EQ || (oparg >> 5) == Py_NE);
@@ -2262,7 +2278,7 @@ dummy_func(
                 // Double-check that the opcode isn't instrumented or something:
                 here->op.code == JUMP_BACKWARD)
             {
-                OBJECT_STAT_INC(optimization_attempts);
+                OPT_STAT_INC(attempts);
                 int optimized = _PyOptimizer_BackEdge(frame, here, next_instr, stack_pointer);
                 ERROR_IF(optimized < 0, error);
                 if (optimized) {
@@ -2492,7 +2508,7 @@ dummy_func(
         }
 
         op(_ITER_CHECK_LIST, (iter -- iter)) {
-            DEOPT_IF(Py_TYPE(iter) != &PyListIter_Type, FOR_ITER);
+            DEOPT_IF(Py_TYPE(iter) != &PyListIter_Type);
         }
 
         op(_ITER_JUMP_LIST, (iter -- iter)) {
@@ -2548,7 +2564,7 @@ dummy_func(
             _ITER_NEXT_LIST;
 
         op(_ITER_CHECK_TUPLE, (iter -- iter)) {
-            DEOPT_IF(Py_TYPE(iter) != &PyTupleIter_Type, FOR_ITER);
+            DEOPT_IF(Py_TYPE(iter) != &PyTupleIter_Type);
         }
 
         op(_ITER_JUMP_TUPLE, (iter -- iter)) {
@@ -2605,7 +2621,7 @@ dummy_func(
 
         op(_ITER_CHECK_RANGE, (iter -- iter)) {
             _PyRangeIterObject *r = (_PyRangeIterObject *)iter;
-            DEOPT_IF(Py_TYPE(r) != &PyRangeIter_Type, FOR_ITER);
+            DEOPT_IF(Py_TYPE(r) != &PyRangeIter_Type);
         }
 
         op(_ITER_JUMP_RANGE, (iter -- iter)) {
@@ -2647,10 +2663,10 @@ dummy_func(
             _ITER_NEXT_RANGE;
 
         inst(FOR_ITER_GEN, (unused/1, iter -- iter, unused)) {
-            DEOPT_IF(tstate->interp->eval_frame, FOR_ITER);
+            DEOPT_IF(tstate->interp->eval_frame);
             PyGenObject *gen = (PyGenObject *)iter;
-            DEOPT_IF(Py_TYPE(gen) != &PyGen_Type, FOR_ITER);
-            DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER);
+            DEOPT_IF(Py_TYPE(gen) != &PyGen_Type);
+            DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING);
             STAT_INC(FOR_ITER, hit);
             _PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
             _PyFrame_StackPush(gen_frame, Py_None);
@@ -2790,16 +2806,13 @@ dummy_func(
         op(_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, (owner -- owner)) {
             assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
             PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner);
-            DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) &&
-                     !_PyObject_MakeInstanceAttributesFromDict(owner, dorv),
-                     LOAD_ATTR);
+            DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && !_PyObject_MakeInstanceAttributesFromDict(owner, dorv));
         }
 
         op(_GUARD_KEYS_VERSION, (keys_version/2, owner -- owner)) {
             PyTypeObject *owner_cls = Py_TYPE(owner);
             PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls;
-            DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version !=
-                     keys_version, LOAD_ATTR);
+            DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version != keys_version);
         }
 
         op(_LOAD_ATTR_METHOD_WITH_VALUES, (descr/4, owner -- attr, self if (1))) {
@@ -2835,46 +2848,46 @@ dummy_func(
             unused/2 +
             _LOAD_ATTR_METHOD_NO_DICT;
 
-        inst(LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES, (unused/1, type_version/2, keys_version/2, descr/4, owner -- attr, unused if (0))) {
+        op(_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES, (descr/4, owner -- attr, unused if (0))) {
             assert((oparg & 1) == 0);
-            PyTypeObject *owner_cls = Py_TYPE(owner);
-            assert(type_version != 0);
-            DEOPT_IF(owner_cls->tp_version_tag != type_version, LOAD_ATTR);
-            assert(owner_cls->tp_flags & Py_TPFLAGS_MANAGED_DICT);
-            PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner);
-            DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) &&
-                     !_PyObject_MakeInstanceAttributesFromDict(owner, dorv),
-                     LOAD_ATTR);
-            PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls;
-            DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version !=
-                     keys_version, LOAD_ATTR);
             STAT_INC(LOAD_ATTR, hit);
             assert(descr != NULL);
             DECREF_INPUTS();
             attr = Py_NewRef(descr);
         }
 
-        inst(LOAD_ATTR_NONDESCRIPTOR_NO_DICT, (unused/1, type_version/2, unused/2, descr/4, owner -- attr, unused if (0))) {
+        macro(LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES) =
+            unused/1 +
+            _GUARD_TYPE_VERSION +
+            _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT +
+            _GUARD_KEYS_VERSION +
+            _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES;
+
+        op(_LOAD_ATTR_NONDESCRIPTOR_NO_DICT, (descr/4, owner -- attr, unused if (0))) {
             assert((oparg & 1) == 0);
-            PyTypeObject *owner_cls = Py_TYPE(owner);
-            assert(type_version != 0);
-            DEOPT_IF(owner_cls->tp_version_tag != type_version, LOAD_ATTR);
-            assert(owner_cls->tp_dictoffset == 0);
+            assert(Py_TYPE(owner)->tp_dictoffset == 0);
             STAT_INC(LOAD_ATTR, hit);
             assert(descr != NULL);
             DECREF_INPUTS();
             attr = Py_NewRef(descr);
         }
 
-        inst(LOAD_ATTR_METHOD_LAZY_DICT, (unused/1, type_version/2, unused/2, descr/4, owner -- attr, self if (1))) {
-            assert(oparg & 1);
-            PyTypeObject *owner_cls = Py_TYPE(owner);
-            DEOPT_IF(owner_cls->tp_version_tag != type_version, LOAD_ATTR);
-            Py_ssize_t dictoffset = owner_cls->tp_dictoffset;
+        macro(LOAD_ATTR_NONDESCRIPTOR_NO_DICT) =
+            unused/1 +
+            _GUARD_TYPE_VERSION +
+            unused/2 +
+            _LOAD_ATTR_NONDESCRIPTOR_NO_DICT;
+
+        op(_CHECK_ATTR_METHOD_LAZY_DICT, (owner -- owner)) {
+            Py_ssize_t dictoffset = Py_TYPE(owner)->tp_dictoffset;
             assert(dictoffset > 0);
             PyObject *dict = *(PyObject **)((char *)owner + dictoffset);
             /* This object has a __dict__, just not yet created */
-            DEOPT_IF(dict != NULL, LOAD_ATTR);
+            DEOPT_IF(dict != NULL);
+        }
+
+        op(_LOAD_ATTR_METHOD_LAZY_DICT, (descr/4, owner -- attr, self if (1))) {
+            assert(oparg & 1);
             STAT_INC(LOAD_ATTR, hit);
             assert(descr != NULL);
             assert(_PyType_HasFeature(Py_TYPE(descr), Py_TPFLAGS_METHOD_DESCRIPTOR));
@@ -2882,6 +2895,13 @@ dummy_func(
             self = owner;
         }
 
+        macro(LOAD_ATTR_METHOD_LAZY_DICT) =
+            unused/1 +
+            _GUARD_TYPE_VERSION +
+            _CHECK_ATTR_METHOD_LAZY_DICT +
+            unused/2 +
+            _LOAD_ATTR_METHOD_LAZY_DICT;
+
         inst(INSTRUMENTED_CALL, ( -- )) {
             int is_meth = PEEK(oparg + 1) != NULL;
             int total_args = oparg + is_meth;
@@ -3002,8 +3022,8 @@ dummy_func(
         }
 
         op(_CHECK_CALL_BOUND_METHOD_EXACT_ARGS, (callable, null, unused[oparg] -- callable, null, unused[oparg])) {
-            DEOPT_IF(null != NULL, CALL);
-            DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type, CALL);
+            DEOPT_IF(null != NULL);
+            DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type);
         }
 
         op(_INIT_CALL_BOUND_METHOD_EXACT_ARGS, (callable, unused, unused[oparg] -- func, self, unused[oparg])) {
@@ -3016,22 +3036,22 @@ dummy_func(
         }
 
         op(_CHECK_PEP_523, (--)) {
-            DEOPT_IF(tstate->interp->eval_frame, CALL);
+            DEOPT_IF(tstate->interp->eval_frame);
         }
 
         op(_CHECK_FUNCTION_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) {
-            DEOPT_IF(!PyFunction_Check(callable), CALL);
+            DEOPT_IF(!PyFunction_Check(callable));
             PyFunctionObject *func = (PyFunctionObject *)callable;
-            DEOPT_IF(func->func_version != func_version, CALL);
+            DEOPT_IF(func->func_version != func_version);
             PyCodeObject *code = (PyCodeObject *)func->func_code;
-            DEOPT_IF(code->co_argcount != oparg + (self_or_null != NULL), CALL);
+            DEOPT_IF(code->co_argcount != oparg + (self_or_null != NULL));
         }
 
         op(_CHECK_STACK_SPACE, (callable, unused, unused[oparg] -- callable, unused, unused[oparg])) {
             PyFunctionObject *func = (PyFunctionObject *)callable;
             PyCodeObject *code = (PyCodeObject *)func->func_code;
-            DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL);
-            DEOPT_IF(tstate->py_recursion_remaining <= 1, CALL);
+            DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize));
+            DEOPT_IF(tstate->py_recursion_remaining <= 1);
         }
 
         op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _PyInterpreterFrame*)) {
@@ -3094,24 +3114,24 @@ dummy_func(
             _PUSH_FRAME;
 
         inst(CALL_PY_WITH_DEFAULTS, (unused/1, func_version/2, callable, self_or_null, args[oparg] -- unused)) {
-            DEOPT_IF(tstate->interp->eval_frame, CALL);
+            DEOPT_IF(tstate->interp->eval_frame);
             int argcount = oparg;
             if (self_or_null != NULL) {
                 args--;
                 argcount++;
             }
-            DEOPT_IF(!PyFunction_Check(callable), CALL);
+            DEOPT_IF(!PyFunction_Check(callable));
             PyFunctionObject *func = (PyFunctionObject *)callable;
-            DEOPT_IF(func->func_version != func_version, CALL);
+            DEOPT_IF(func->func_version != func_version);
             PyCodeObject *code = (PyCodeObject *)func->func_code;
             assert(func->func_defaults);
             assert(PyTuple_CheckExact(func->func_defaults));
             int defcount = (int)PyTuple_GET_SIZE(func->func_defaults);
             assert(defcount <= code->co_argcount);
             int min_args = code->co_argcount - defcount;
-            DEOPT_IF(argcount > code->co_argcount, CALL);
-            DEOPT_IF(argcount < min_args, CALL);
-            DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL);
+            DEOPT_IF(argcount > code->co_argcount);
+            DEOPT_IF(argcount < min_args);
+            DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize));
             STAT_INC(CALL, hit);
             _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, func, code->co_argcount);
             for (int i = 0; i < argcount; i++) {
@@ -3130,9 +3150,9 @@ dummy_func(
 
         inst(CALL_TYPE_1, (unused/1, unused/2, callable, null, args[oparg] -- res)) {
             assert(oparg == 1);
-            DEOPT_IF(null != NULL, CALL);
+            DEOPT_IF(null != NULL);
             PyObject *obj = args[0];
-            DEOPT_IF(callable != (PyObject *)&PyType_Type, CALL);
+            DEOPT_IF(callable != (PyObject *)&PyType_Type);
             STAT_INC(CALL, hit);
             res = Py_NewRef(Py_TYPE(obj));
             Py_DECREF(obj);
@@ -3141,8 +3161,8 @@ dummy_func(
 
         inst(CALL_STR_1, (unused/1, unused/2, callable, null, args[oparg] -- res)) {
             assert(oparg == 1);
-            DEOPT_IF(null != NULL, CALL);
-            DEOPT_IF(callable != (PyObject *)&PyUnicode_Type, CALL);
+            DEOPT_IF(null != NULL);
+            DEOPT_IF(callable != (PyObject *)&PyUnicode_Type);
             STAT_INC(CALL, hit);
             PyObject *arg = args[0];
             res = PyObject_Str(arg);
@@ -3154,8 +3174,8 @@ dummy_func(
 
         inst(CALL_TUPLE_1, (unused/1, unused/2, callable, null, args[oparg] -- res)) {
             assert(oparg == 1);
-            DEOPT_IF(null != NULL, CALL);
-            DEOPT_IF(callable != (PyObject *)&PyTuple_Type, CALL);
+            DEOPT_IF(null != NULL);
+            DEOPT_IF(callable != (PyObject *)&PyTuple_Type);
             STAT_INC(CALL, hit);
             PyObject *arg = args[0];
             res = PySequence_Tuple(arg);
@@ -3172,15 +3192,15 @@ dummy_func(
              * 3. Pushes the frame for ``__init__`` to the frame stack
              * */
             _PyCallCache *cache = (_PyCallCache *)next_instr;
-            DEOPT_IF(null != NULL, CALL);
-            DEOPT_IF(!PyType_Check(callable), CALL);
+            DEOPT_IF(null != NULL);
+            DEOPT_IF(!PyType_Check(callable));
             PyTypeObject *tp = (PyTypeObject *)callable;
-            DEOPT_IF(tp->tp_version_tag != read_u32(cache->func_version), CALL);
+            DEOPT_IF(tp->tp_version_tag != read_u32(cache->func_version));
             PyHeapTypeObject *cls = (PyHeapTypeObject *)callable;
             PyFunctionObject *init = (PyFunctionObject *)cls->_spec_cache.init;
             PyCodeObject *code = (PyCodeObject *)init->func_code;
-            DEOPT_IF(code->co_argcount != oparg+1, CALL);
-            DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize), CALL);
+            DEOPT_IF(code->co_argcount != oparg+1);
+            DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize));
             STAT_INC(CALL, hit);
             PyObject *self = _PyType_NewManagedObject(tp);
             if (self == NULL) {
@@ -3233,9 +3253,9 @@ dummy_func(
                 args--;
                 total_args++;
             }
-            DEOPT_IF(!PyType_Check(callable), CALL);
+            DEOPT_IF(!PyType_Check(callable));
             PyTypeObject *tp = (PyTypeObject *)callable;
-            DEOPT_IF(tp->tp_vectorcall == NULL, CALL);
+            DEOPT_IF(tp->tp_vectorcall == NULL);
             STAT_INC(CALL, hit);
             res = tp->tp_vectorcall((PyObject *)tp, args, total_args, NULL);
             /* Free the arguments. */
@@ -3254,9 +3274,9 @@ dummy_func(
                 args--;
                 total_args++;
             }
-            DEOPT_IF(total_args != 1, CALL);
-            DEOPT_IF(!PyCFunction_CheckExact(callable), CALL);
-            DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_O, CALL);
+            DEOPT_IF(total_args != 1);
+            DEOPT_IF(!PyCFunction_CheckExact(callable));
+            DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_O);
             STAT_INC(CALL, hit);
             PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable);
             // This is slower but CPython promises to check all non-vectorcall
@@ -3282,8 +3302,8 @@ dummy_func(
                 args--;
                 total_args++;
             }
-            DEOPT_IF(!PyCFunction_CheckExact(callable), CALL);
-            DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_FASTCALL, CALL);
+            DEOPT_IF(!PyCFunction_CheckExact(callable));
+            DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_FASTCALL);
             STAT_INC(CALL, hit);
             PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable);
             /* res = func(self, args, nargs) */
@@ -3314,9 +3334,8 @@ dummy_func(
                 args--;
                 total_args++;
             }
-            DEOPT_IF(!PyCFunction_CheckExact(callable), CALL);
-            DEOPT_IF(PyCFunction_GET_FLAGS(callable) !=
-                (METH_FASTCALL | METH_KEYWORDS), CALL);
+            DEOPT_IF(!PyCFunction_CheckExact(callable));
+            DEOPT_IF(PyCFunction_GET_FLAGS(callable) != (METH_FASTCALL | METH_KEYWORDS));
             STAT_INC(CALL, hit);
             /* res = func(self, args, nargs, kwnames) */
             _PyCFunctionFastWithKeywords cfunc =
@@ -3341,9 +3360,9 @@ dummy_func(
                 args--;
                 total_args++;
             }
-            DEOPT_IF(total_args != 1, CALL);
+            DEOPT_IF(total_args != 1);
             PyInterpreterState *interp = tstate->interp;
-            DEOPT_IF(callable != interp->callable_cache.len, CALL);
+            DEOPT_IF(callable != interp->callable_cache.len);
             STAT_INC(CALL, hit);
             PyObject *arg = args[0];
             Py_ssize_t len_i = PyObject_Length(arg);
@@ -3365,9 +3384,9 @@ dummy_func(
                 args--;
                 total_args++;
             }
-            DEOPT_IF(total_args != 2, CALL);
+            DEOPT_IF(total_args != 2);
             PyInterpreterState *interp = tstate->interp;
-            DEOPT_IF(callable != interp->callable_cache.isinstance, CALL);
+            DEOPT_IF(callable != interp->callable_cache.isinstance);
             STAT_INC(CALL, hit);
             PyObject *cls = args[1];
             PyObject *inst = args[0];
@@ -3388,9 +3407,9 @@ dummy_func(
         inst(CALL_LIST_APPEND, (unused/1, unused/2, callable, self, args[oparg] -- unused)) {
             assert(oparg == 1);
             PyInterpreterState *interp = tstate->interp;
-            DEOPT_IF(callable != interp->callable_cache.list_append, CALL);
+            DEOPT_IF(callable != interp->callable_cache.list_append);
             assert(self != NULL);
-            DEOPT_IF(!PyList_Check(self), CALL);
+            DEOPT_IF(!PyList_Check(self));
             STAT_INC(CALL, hit);
             if (_PyList_AppendTakeRef((PyListObject *)self, args[0]) < 0) {
                 goto pop_1_error;  // Since arg is DECREF'ed already
@@ -3411,13 +3430,13 @@ dummy_func(
                 total_args++;
             }
             PyMethodDescrObject *method = (PyMethodDescrObject *)callable;
-            DEOPT_IF(total_args != 2, CALL);
-            DEOPT_IF(!Py_IS_TYPE(method, &PyMethodDescr_Type), CALL);
+            DEOPT_IF(total_args != 2);
+            DEOPT_IF(!Py_IS_TYPE(method, &PyMethodDescr_Type));
             PyMethodDef *meth = method->d_method;
-            DEOPT_IF(meth->ml_flags != METH_O, CALL);
+            DEOPT_IF(meth->ml_flags != METH_O);
             PyObject *arg = args[1];
             PyObject *self = args[0];
-            DEOPT_IF(!Py_IS_TYPE(self, method->d_common.d_type), CALL);
+            DEOPT_IF(!Py_IS_TYPE(self, method->d_common.d_type));
             STAT_INC(CALL, hit);
             PyCFunction cfunc = meth->ml_meth;
             // This is slower but CPython promises to check all non-vectorcall
@@ -3442,12 +3461,12 @@ dummy_func(
                 total_args++;
             }
             PyMethodDescrObject *method = (PyMethodDescrObject *)callable;
-            DEOPT_IF(!Py_IS_TYPE(method, &PyMethodDescr_Type), CALL);
+            DEOPT_IF(!Py_IS_TYPE(method, &PyMethodDescr_Type));
             PyMethodDef *meth = method->d_method;
-            DEOPT_IF(meth->ml_flags != (METH_FASTCALL|METH_KEYWORDS), CALL);
+            DEOPT_IF(meth->ml_flags != (METH_FASTCALL|METH_KEYWORDS));
             PyTypeObject *d_type = method->d_common.d_type;
             PyObject *self = args[0];
-            DEOPT_IF(!Py_IS_TYPE(self, d_type), CALL);
+            DEOPT_IF(!Py_IS_TYPE(self, d_type));
             STAT_INC(CALL, hit);
             int nargs = total_args - 1;
             _PyCFunctionFastWithKeywords cfunc =
@@ -3471,13 +3490,13 @@ dummy_func(
                 args--;
                 total_args++;
             }
-            DEOPT_IF(total_args != 1, CALL);
+            DEOPT_IF(total_args != 1);
             PyMethodDescrObject *method = (PyMethodDescrObject *)callable;
-            DEOPT_IF(!Py_IS_TYPE(method, &PyMethodDescr_Type), CALL);
+            DEOPT_IF(!Py_IS_TYPE(method, &PyMethodDescr_Type));
             PyMethodDef *meth = method->d_method;
             PyObject *self = args[0];
-            DEOPT_IF(!Py_IS_TYPE(self, method->d_common.d_type), CALL);
-            DEOPT_IF(meth->ml_flags != METH_NOARGS, CALL);
+            DEOPT_IF(!Py_IS_TYPE(self, method->d_common.d_type));
+            DEOPT_IF(meth->ml_flags != METH_NOARGS);
             STAT_INC(CALL, hit);
             PyCFunction cfunc = meth->ml_meth;
             // This is slower but CPython promises to check all non-vectorcall
@@ -3502,11 +3521,11 @@ dummy_func(
             }
             PyMethodDescrObject *method = (PyMethodDescrObject *)callable;
             /* Builtin METH_FASTCALL methods, without keywords */
-            DEOPT_IF(!Py_IS_TYPE(method, &PyMethodDescr_Type), CALL);
+            DEOPT_IF(!Py_IS_TYPE(method, &PyMethodDescr_Type));
             PyMethodDef *meth = method->d_method;
-            DEOPT_IF(meth->ml_flags != METH_FASTCALL, CALL);
+            DEOPT_IF(meth->ml_flags != METH_FASTCALL);
             PyObject *self = args[0];
-            DEOPT_IF(!Py_IS_TYPE(self, method->d_common.d_type), CALL);
+            DEOPT_IF(!Py_IS_TYPE(self, method->d_common.d_type));
             STAT_INC(CALL, hit);
             _PyCFunctionFast cfunc =
                 (_PyCFunctionFast)(void(*)(void))meth->ml_meth;
@@ -3946,6 +3965,7 @@ dummy_func(
             frame->prev_instr--;  // Back up to just before destination
             _PyFrame_SetStackPointer(frame, stack_pointer);
             Py_DECREF(self);
+            OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
             return frame;
         }
 
diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c
index ba16f5eb9bfe74..f237e384333e45 100644
--- a/Python/ceval_gil.c
+++ b/Python/ceval_gil.c
@@ -57,113 +57,62 @@
 #define _Py_atomic_load_relaxed_int32(ATOMIC_VAL) _Py_atomic_load_relaxed(ATOMIC_VAL)
 #endif
 
-/* This can set eval_breaker to 0 even though gil_drop_request became
-   1.  We believe this is all right because the eval loop will release
-   the GIL eventually anyway. */
+/* bpo-40010: eval_breaker should be recomputed if there
+   is a pending signal: signal received by another thread which cannot
+   handle signals.
+   Similarly, we set CALLS_TO_DO and ASYNC_EXCEPTION to match the thread.
+*/
 static inline void
-COMPUTE_EVAL_BREAKER(PyInterpreterState *interp,
-                     struct _ceval_runtime_state *ceval,
-                     struct _ceval_state *ceval2)
+update_eval_breaker_from_thread(PyInterpreterState *interp, PyThreadState *tstate)
 {
-    _Py_atomic_store_relaxed(&ceval2->eval_breaker,
-        _Py_atomic_load_relaxed_int32(&ceval2->gil_drop_request)
-        | (_Py_atomic_load_relaxed_int32(&ceval->signals_pending)
-           && _Py_ThreadCanHandleSignals(interp))
-        | (_Py_atomic_load_relaxed_int32(&ceval2->pending.calls_to_do))
-        | (_Py_IsMainThread() && _Py_IsMainInterpreter(interp)
-           &&_Py_atomic_load_relaxed_int32(&ceval->pending_mainthread.calls_to_do))
-        | ceval2->pending.async_exc
-        | _Py_atomic_load_relaxed_int32(&ceval2->gc_scheduled));
-}
+    if (tstate == NULL) {
+        return;
+    }
 
+    if (_Py_IsMainThread()) {
+        int32_t calls_to_do = _Py_atomic_load_int32_relaxed(
+            &_PyRuntime.ceval.pending_mainthread.calls_to_do);
+        if (calls_to_do) {
+            _Py_set_eval_breaker_bit(interp, _PY_CALLS_TO_DO_BIT, 1);
+        }
+        if (_Py_ThreadCanHandleSignals(interp)) {
+            if (_Py_atomic_load(&_PyRuntime.signals.is_tripped)) {
+                _Py_set_eval_breaker_bit(interp, _PY_SIGNALS_PENDING_BIT, 1);
+            }
+        }
+    }
+    if (tstate->async_exc != NULL) {
+        _Py_set_eval_breaker_bit(interp, _PY_ASYNC_EXCEPTION_BIT, 1);
+    }
+}
 
 static inline void
 SET_GIL_DROP_REQUEST(PyInterpreterState *interp)
 {
-    struct _ceval_state *ceval2 = &interp->ceval;
-    _Py_atomic_store_relaxed(&ceval2->gil_drop_request, 1);
-    _Py_atomic_store_relaxed(&ceval2->eval_breaker, 1);
+    _Py_set_eval_breaker_bit(interp, _PY_GIL_DROP_REQUEST_BIT, 1);
 }
 
 
 static inline void
 RESET_GIL_DROP_REQUEST(PyInterpreterState *interp)
 {
-    struct _ceval_runtime_state *ceval = &interp->runtime->ceval;
-    struct _ceval_state *ceval2 = &interp->ceval;
-    _Py_atomic_store_relaxed(&ceval2->gil_drop_request, 0);
-    COMPUTE_EVAL_BREAKER(interp, ceval, ceval2);
+    _Py_set_eval_breaker_bit(interp, _PY_GIL_DROP_REQUEST_BIT, 0);
 }
 
 
 static inline void
-SIGNAL_PENDING_CALLS(struct _pending_calls *pending, PyInterpreterState *interp)
+SIGNAL_PENDING_CALLS(PyInterpreterState *interp)
 {
-    struct _ceval_runtime_state *ceval = &interp->runtime->ceval;
-    struct _ceval_state *ceval2 = &interp->ceval;
-    _Py_atomic_store_relaxed(&pending->calls_to_do, 1);
-    COMPUTE_EVAL_BREAKER(interp, ceval, ceval2);
+    _Py_set_eval_breaker_bit(interp, _PY_CALLS_TO_DO_BIT, 1);
 }
 
 
 static inline void
 UNSIGNAL_PENDING_CALLS(PyInterpreterState *interp)
 {
-    struct _ceval_runtime_state *ceval = &interp->runtime->ceval;
-    struct _ceval_state *ceval2 = &interp->ceval;
-    if (_Py_IsMainThread() && _Py_IsMainInterpreter(interp)) {
-        _Py_atomic_store_relaxed(&ceval->pending_mainthread.calls_to_do, 0);
-    }
-    _Py_atomic_store_relaxed(&ceval2->pending.calls_to_do, 0);
-    COMPUTE_EVAL_BREAKER(interp, ceval, ceval2);
+    _Py_set_eval_breaker_bit(interp, _PY_CALLS_TO_DO_BIT, 0);
 }
 
-
-static inline void
-SIGNAL_PENDING_SIGNALS(PyInterpreterState *interp, int force)
-{
-    struct _ceval_runtime_state *ceval = &interp->runtime->ceval;
-    struct _ceval_state *ceval2 = &interp->ceval;
-    _Py_atomic_store_relaxed(&ceval->signals_pending, 1);
-    if (force) {
-        _Py_atomic_store_relaxed(&ceval2->eval_breaker, 1);
-    }
-    else {
-        /* eval_breaker is not set to 1 if thread_can_handle_signals() is false */
-        COMPUTE_EVAL_BREAKER(interp, ceval, ceval2);
-    }
-}
-
-
-static inline void
-UNSIGNAL_PENDING_SIGNALS(PyInterpreterState *interp)
-{
-    struct _ceval_runtime_state *ceval = &interp->runtime->ceval;
-    struct _ceval_state *ceval2 = &interp->ceval;
-    _Py_atomic_store_relaxed(&ceval->signals_pending, 0);
-    COMPUTE_EVAL_BREAKER(interp, ceval, ceval2);
-}
-
-
-static inline void
-SIGNAL_ASYNC_EXC(PyInterpreterState *interp)
-{
-    struct _ceval_state *ceval2 = &interp->ceval;
-    ceval2->pending.async_exc = 1;
-    _Py_atomic_store_relaxed(&ceval2->eval_breaker, 1);
-}
-
-
-static inline void
-UNSIGNAL_ASYNC_EXC(PyInterpreterState *interp)
-{
-    struct _ceval_runtime_state *ceval = &interp->runtime->ceval;
-    struct _ceval_state *ceval2 = &interp->ceval;
-    ceval2->pending.async_exc = 0;
-    COMPUTE_EVAL_BREAKER(interp, ceval, ceval2);
-}
-
-
 /*
  * Implementation of the Global Interpreter Lock (GIL).
  */
@@ -271,8 +220,9 @@ static void recreate_gil(struct _gil_runtime_state *gil)
 #endif
 
 static void
-drop_gil(struct _ceval_state *ceval, PyThreadState *tstate)
+drop_gil(PyInterpreterState *interp, PyThreadState *tstate)
 {
+    struct _ceval_state *ceval = &interp->ceval;
     /* If tstate is NULL, the caller is indicating that we're releasing
        the GIL for the last time in this thread.  This is particularly
        relevant when the current thread state is finalizing or its
@@ -310,7 +260,7 @@ drop_gil(struct _ceval_state *ceval, PyThreadState *tstate)
        the GIL, and that's the only time we might delete the
        interpreter, so checking tstate first prevents the crash.
        See https://github.com/python/cpython/issues/104341. */
-    if (tstate != NULL && _Py_atomic_load_relaxed(&ceval->gil_drop_request)) {
+    if (tstate != NULL && _Py_eval_breaker_bit_is_set(interp, _PY_GIL_DROP_REQUEST_BIT)) {
         MUTEX_LOCK(gil->switch_mutex);
         /* Not switched yet => wait */
         if (((PyThreadState*)_Py_atomic_load_relaxed(&gil->last_holder)) == tstate)
@@ -356,8 +306,7 @@ take_gil(PyThreadState *tstate)
 
     assert(_PyThreadState_CheckConsistency(tstate));
     PyInterpreterState *interp = tstate->interp;
-    struct _ceval_state *ceval = &interp->ceval;
-    struct _gil_runtime_state *gil = ceval->gil;
+    struct _gil_runtime_state *gil = interp->ceval.gil;
 
     /* Check that _PyEval_InitThreads() was called to create the lock */
     assert(gil_created(gil));
@@ -431,27 +380,13 @@ take_gil(PyThreadState *tstate)
            in take_gil() while the main thread called
            wait_for_thread_shutdown() from Py_Finalize(). */
         MUTEX_UNLOCK(gil->mutex);
-        drop_gil(ceval, tstate);
+        drop_gil(interp, tstate);
         PyThread_exit_thread();
     }
     assert(_PyThreadState_CheckConsistency(tstate));
 
-    if (_Py_atomic_load_relaxed(&ceval->gil_drop_request)) {
-        RESET_GIL_DROP_REQUEST(interp);
-    }
-    else {
-        /* bpo-40010: eval_breaker should be recomputed to be set to 1 if there
-           is a pending signal: signal received by another thread which cannot
-           handle signals.
-
-           Note: RESET_GIL_DROP_REQUEST() calls COMPUTE_EVAL_BREAKER(). */
-        COMPUTE_EVAL_BREAKER(interp, &_PyRuntime.ceval, ceval);
-    }
-
-    /* Don't access tstate if the thread must exit */
-    if (tstate->async_exc != NULL) {
-        _PyEval_SignalAsyncExc(tstate->interp);
-    }
+    RESET_GIL_DROP_REQUEST(interp);
+    update_eval_breaker_from_thread(interp, tstate);
 
     MUTEX_UNLOCK(gil->mutex);
 
@@ -527,24 +462,22 @@ PyStatus
 _PyEval_InitGIL(PyThreadState *tstate, int own_gil)
 {
     assert(tstate->interp->ceval.gil == NULL);
-    int locked;
     if (!own_gil) {
         /* The interpreter will share the main interpreter's instead. */
         PyInterpreterState *main_interp = _PyInterpreterState_Main();
         assert(tstate->interp != main_interp);
         struct _gil_runtime_state *gil = main_interp->ceval.gil;
         init_shared_gil(tstate->interp, gil);
-        locked = current_thread_holds_gil(gil, tstate);
+        assert(!current_thread_holds_gil(gil, tstate));
     }
     else {
         PyThread_init_thread();
         init_own_gil(tstate->interp, &tstate->interp->_gil);
-        locked = 0;
-    }
-    if (!locked) {
-        take_gil(tstate);
     }
 
+    // Lock the GIL and mark the current thread as attached.
+    _PyThreadState_Attach(tstate);
+
     return _PyStatus_OK();
 }
 
@@ -611,8 +544,7 @@ PyEval_ReleaseLock(void)
     /* This function must succeed when the current thread state is NULL.
        We therefore avoid PyThreadState_Get() which dumps a fatal error
        in debug mode. */
-    struct _ceval_state *ceval = &tstate->interp->ceval;
-    drop_gil(ceval, tstate);
+    drop_gil(tstate->interp, tstate);
 }
 
 void
@@ -628,33 +560,21 @@ _PyEval_ReleaseLock(PyInterpreterState *interp, PyThreadState *tstate)
     /* If tstate is NULL then we do not expect the current thread
        to acquire the GIL ever again. */
     assert(tstate == NULL || tstate->interp == interp);
-    struct _ceval_state *ceval = &interp->ceval;
-    drop_gil(ceval, tstate);
+    drop_gil(interp, tstate);
 }
 
 void
 PyEval_AcquireThread(PyThreadState *tstate)
 {
     _Py_EnsureTstateNotNULL(tstate);
-
-    take_gil(tstate);
-
-    if (_PyThreadState_SwapNoGIL(tstate) != NULL) {
-        Py_FatalError("non-NULL old thread state");
-    }
+    _PyThreadState_Attach(tstate);
 }
 
 void
 PyEval_ReleaseThread(PyThreadState *tstate)
 {
     assert(_PyThreadState_CheckConsistency(tstate));
-
-    PyThreadState *new_tstate = _PyThreadState_SwapNoGIL(NULL);
-    if (new_tstate != tstate) {
-        Py_FatalError("wrong thread state");
-    }
-    struct _ceval_state *ceval = &tstate->interp->ceval;
-    drop_gil(ceval, tstate);
+    _PyThreadState_Detach(tstate);
 }
 
 #ifdef HAVE_FORK
@@ -691,18 +611,14 @@ _PyEval_ReInitThreads(PyThreadState *tstate)
 void
 _PyEval_SignalAsyncExc(PyInterpreterState *interp)
 {
-    SIGNAL_ASYNC_EXC(interp);
+    _Py_set_eval_breaker_bit(interp, _PY_ASYNC_EXCEPTION_BIT, 1);
 }
 
 PyThreadState *
 PyEval_SaveThread(void)
 {
-    PyThreadState *tstate = _PyThreadState_SwapNoGIL(NULL);
-    _Py_EnsureTstateNotNULL(tstate);
-
-    struct _ceval_state *ceval = &tstate->interp->ceval;
-    assert(gil_created(ceval->gil));
-    drop_gil(ceval, tstate);
+    PyThreadState *tstate = _PyThreadState_GET();
+    _PyThreadState_Detach(tstate);
     return tstate;
 }
 
@@ -710,10 +626,7 @@ void
 PyEval_RestoreThread(PyThreadState *tstate)
 {
     _Py_EnsureTstateNotNULL(tstate);
-
-    take_gil(tstate);
-
-    _PyThreadState_SwapNoGIL(tstate);
+    _PyThreadState_Attach(tstate);
 }
 
 
@@ -742,22 +655,9 @@ PyEval_RestoreThread(PyThreadState *tstate)
 void
 _PyEval_SignalReceived(PyInterpreterState *interp)
 {
-#ifdef MS_WINDOWS
-    // bpo-42296: On Windows, _PyEval_SignalReceived() is called from a signal
-    // handler which can run in a thread different than the Python thread, in
-    // which case _Py_ThreadCanHandleSignals() is wrong. Ignore
-    // _Py_ThreadCanHandleSignals() and always set eval_breaker to 1.
-    //
-    // The next eval_frame_handle_pending() call will call
-    // _Py_ThreadCanHandleSignals() to recompute eval_breaker.
-    int force = 1;
-#else
-    int force = 0;
-#endif
-    /* bpo-30703: Function called when the C signal handler of Python gets a
-       signal. We cannot queue a callback using _PyEval_AddPendingCall() since
-       that function is not async-signal-safe. */
-    SIGNAL_PENDING_SIGNALS(interp, force);
+    if (_Py_ThreadCanHandleSignals(interp)) {
+        _Py_set_eval_breaker_bit(interp, _PY_SIGNALS_PENDING_BIT, 1);
+    }
 }
 
 /* Push one item onto the queue while holding the lock. */
@@ -773,6 +673,8 @@ _push_pending_call(struct _pending_calls *pending,
     pending->calls[i].func = func;
     pending->calls[i].arg = arg;
     pending->last = j;
+    assert(pending->calls_to_do < NPENDINGCALLS);
+    pending->calls_to_do++;
     return 0;
 }
 
@@ -800,6 +702,8 @@ _pop_pending_call(struct _pending_calls *pending,
     if (i >= 0) {
         pending->calls[i] = (struct _pending_call){0};
         pending->first = (i + 1) % NPENDINGCALLS;
+        assert(pending->calls_to_do > 0);
+        pending->calls_to_do--;
     }
 }
 
@@ -829,7 +733,7 @@ _PyEval_AddPendingCall(PyInterpreterState *interp,
     PyThread_release_lock(pending->lock);
 
     /* signal main loop */
-    SIGNAL_PENDING_CALLS(pending, interp);
+    SIGNAL_PENDING_CALLS(interp);
     return result;
 }
 
@@ -846,33 +750,18 @@ static int
 handle_signals(PyThreadState *tstate)
 {
     assert(_PyThreadState_CheckConsistency(tstate));
+    _Py_set_eval_breaker_bit(tstate->interp, _PY_SIGNALS_PENDING_BIT, 0);
     if (!_Py_ThreadCanHandleSignals(tstate->interp)) {
         return 0;
     }
-
-    UNSIGNAL_PENDING_SIGNALS(tstate->interp);
     if (_PyErr_CheckSignalsTstate(tstate) < 0) {
         /* On failure, re-schedule a call to handle_signals(). */
-        SIGNAL_PENDING_SIGNALS(tstate->interp, 0);
+        _Py_set_eval_breaker_bit(tstate->interp, _PY_SIGNALS_PENDING_BIT, 1);
         return -1;
     }
     return 0;
 }
 
-static inline int
-maybe_has_pending_calls(PyInterpreterState *interp)
-{
-    struct _pending_calls *pending = &interp->ceval.pending;
-    if (_Py_atomic_load_relaxed_int32(&pending->calls_to_do)) {
-        return 1;
-    }
-    if (!_Py_IsMainThread() || !_Py_IsMainInterpreter(interp)) {
-        return 0;
-    }
-    pending = &_PyRuntime.ceval.pending_mainthread;
-    return _Py_atomic_load_relaxed_int32(&pending->calls_to_do);
-}
-
 static int
 _make_pending_calls(struct _pending_calls *pending)
 {
@@ -930,7 +819,7 @@ make_pending_calls(PyInterpreterState *interp)
     if (_make_pending_calls(pending) != 0) {
         pending->busy = 0;
         /* There might not be more calls to make, but we play it safe. */
-        SIGNAL_PENDING_CALLS(pending, interp);
+        SIGNAL_PENDING_CALLS(interp);
         return -1;
     }
 
@@ -938,7 +827,7 @@ make_pending_calls(PyInterpreterState *interp)
         if (_make_pending_calls(pending_main) != 0) {
             pending->busy = 0;
             /* There might not be more calls to make, but we play it safe. */
-            SIGNAL_PENDING_CALLS(pending_main, interp);
+            SIGNAL_PENDING_CALLS(interp);
             return -1;
         }
     }
@@ -1083,70 +972,49 @@ _PyEval_FiniState(struct _ceval_state *ceval)
 int
 _Py_HandlePending(PyThreadState *tstate)
 {
-    _PyRuntimeState * const runtime = &_PyRuntime;
-    struct _ceval_runtime_state *ceval = &runtime->ceval;
-    struct _ceval_state *interp_ceval_state = &tstate->interp->ceval;
+    PyInterpreterState *interp = tstate->interp;
 
     /* Pending signals */
-    if (_Py_atomic_load_relaxed_int32(&ceval->signals_pending)) {
+    if (_Py_eval_breaker_bit_is_set(interp, _PY_SIGNALS_PENDING_BIT)) {
         if (handle_signals(tstate) != 0) {
             return -1;
         }
     }
 
     /* Pending calls */
-    if (maybe_has_pending_calls(tstate->interp)) {
-        if (make_pending_calls(tstate->interp) != 0) {
+    if (_Py_eval_breaker_bit_is_set(interp, _PY_CALLS_TO_DO_BIT)) {
+        if (make_pending_calls(interp) != 0) {
             return -1;
         }
     }
 
     /* GC scheduled to run */
-    if (_Py_atomic_load_relaxed_int32(&interp_ceval_state->gc_scheduled)) {
-        _Py_atomic_store_relaxed(&interp_ceval_state->gc_scheduled, 0);
-        COMPUTE_EVAL_BREAKER(tstate->interp, ceval, interp_ceval_state);
+    if (_Py_eval_breaker_bit_is_set(interp, _PY_GC_SCHEDULED_BIT)) {
+        _Py_set_eval_breaker_bit(interp, _PY_GC_SCHEDULED_BIT, 0);
         _Py_RunGC(tstate);
     }
 
     /* GIL drop request */
-    if (_Py_atomic_load_relaxed_int32(&interp_ceval_state->gil_drop_request)) {
+    if (_Py_eval_breaker_bit_is_set(interp, _PY_GIL_DROP_REQUEST_BIT)) {
         /* Give another thread a chance */
-        if (_PyThreadState_SwapNoGIL(NULL) != tstate) {
-            Py_FatalError("tstate mix-up");
-        }
-        drop_gil(interp_ceval_state, tstate);
+        _PyThreadState_Detach(tstate);
 
         /* Other threads may run now */
 
-        take_gil(tstate);
-
-        if (_PyThreadState_SwapNoGIL(tstate) != NULL) {
-            Py_FatalError("orphan tstate");
-        }
+        _PyThreadState_Attach(tstate);
     }
 
     /* Check for asynchronous exception. */
-    if (tstate->async_exc != NULL) {
-        PyObject *exc = tstate->async_exc;
-        tstate->async_exc = NULL;
-        UNSIGNAL_ASYNC_EXC(tstate->interp);
-        _PyErr_SetNone(tstate, exc);
-        Py_DECREF(exc);
-        return -1;
+    if (_Py_eval_breaker_bit_is_set(interp, _PY_ASYNC_EXCEPTION_BIT)) {
+        _Py_set_eval_breaker_bit(interp, _PY_ASYNC_EXCEPTION_BIT, 0);
+        if (tstate->async_exc != NULL) {
+            PyObject *exc = tstate->async_exc;
+            tstate->async_exc = NULL;
+            _PyErr_SetNone(tstate, exc);
+            Py_DECREF(exc);
+            return -1;
+        }
     }
-
-
-    // It is possible that some of the conditions that trigger the eval breaker
-    // are called in a different thread than the Python thread. An example of
-    // this is bpo-42296: On Windows, _PyEval_SignalReceived() can be called in
-    // a different thread than the Python thread, in which case
-    // _Py_ThreadCanHandleSignals() is wrong. Recompute eval_breaker in the
-    // current Python thread with the correct _Py_ThreadCanHandleSignals()
-    // value. It prevents to interrupt the eval loop at every instruction if
-    // the current Python thread cannot handle signals (if
-    // _Py_ThreadCanHandleSignals() is false).
-    COMPUTE_EVAL_BREAKER(tstate->interp, ceval, interp_ceval_state);
-
     return 0;
 }
 
diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h
index 012750df387c1c..872e0a2b7f92ca 100644
--- a/Python/ceval_macros.h
+++ b/Python/ceval_macros.h
@@ -116,7 +116,7 @@
 
 #define CHECK_EVAL_BREAKER() \
     _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); \
-    if (_Py_atomic_load_relaxed_int32(&tstate->interp->ceval.eval_breaker)) { \
+    if (_Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & _PY_EVAL_EVENTS_MASK) { \
         if (_Py_HandlePending(tstate) != 0) { \
             goto error; \
         } \
diff --git a/Python/condvar.h b/Python/condvar.h
index 4ddc5311cf8fad..d54db94f2c871d 100644
--- a/Python/condvar.h
+++ b/Python/condvar.h
@@ -41,7 +41,8 @@
 #define _CONDVAR_IMPL_H_
 
 #include "Python.h"
-#include "pycore_condvar.h"
+#include "pycore_pythread.h"      // _POSIX_THREADS
+
 
 #ifdef _POSIX_THREADS
 /*
diff --git a/Python/dynload_hpux.c b/Python/dynload_hpux.c
index a53373038ed859..1c44722ff9a2d0 100644
--- a/Python/dynload_hpux.c
+++ b/Python/dynload_hpux.c
@@ -5,7 +5,7 @@
 #include <errno.h>
 
 #include "Python.h"
-#include "importdl.h"
+#include "pycore_importdl.h"
 
 #if defined(__hp9000s300)
 #define FUNCNAME_PATTERN "_%.20s_%.200s"
diff --git a/Python/dynload_shlib.c b/Python/dynload_shlib.c
index 6761bba457983b..5a37a83805ba78 100644
--- a/Python/dynload_shlib.c
+++ b/Python/dynload_shlib.c
@@ -4,7 +4,7 @@
 #include "Python.h"
 #include "pycore_interp.h"    // _PyInterpreterState.dlopenflags
 #include "pycore_pystate.h"   // _PyInterpreterState_GET()
-#include "importdl.h"
+#include "pycore_importdl.h"
 
 #include <sys/types.h>
 #include <sys/stat.h>
diff --git a/Python/dynload_stub.c b/Python/dynload_stub.c
index 59160483caa448..11f7e5f643f79e 100644
--- a/Python/dynload_stub.c
+++ b/Python/dynload_stub.c
@@ -3,7 +3,7 @@
    not present. */
 
 #include "Python.h"
-#include "importdl.h"
+#include "pycore_importdl.h"
 
 
 const char *_PyImport_DynLoadFiletab[] = {NULL};
diff --git a/Python/dynload_win.c b/Python/dynload_win.c
index fcb3cb744047ce..a0ac31c80a5f6e 100644
--- a/Python/dynload_win.c
+++ b/Python/dynload_win.c
@@ -5,30 +5,10 @@
 #include "pycore_fileutils.h"     // _Py_add_relfile()
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
 
-#include "importdl.h"             // dl_funcptr
+#include "pycore_importdl.h"      // dl_funcptr
 #include "patchlevel.h"           // PY_MAJOR_VERSION
 #include <windows.h>
 
-#ifdef _DEBUG
-#define PYD_DEBUG_SUFFIX "_d"
-#else
-#define PYD_DEBUG_SUFFIX ""
-#endif
-
-#ifdef Py_NOGIL
-#  define PYD_THREADING_TAG "t"
-#else
-#  define PYD_THREADING_TAG ""
-#endif
-
-#ifdef PYD_PLATFORM_TAG
-#define PYD_TAGGED_SUFFIX PYD_DEBUG_SUFFIX ".cp" Py_STRINGIFY(PY_MAJOR_VERSION) Py_STRINGIFY(PY_MINOR_VERSION) PYD_THREADING_TAG "-" PYD_PLATFORM_TAG ".pyd"
-#else
-#define PYD_TAGGED_SUFFIX PYD_DEBUG_SUFFIX ".cp" Py_STRINGIFY(PY_MAJOR_VERSION) Py_STRINGIFY(PY_MINOR_VERSION) PYD_THREADING_TAG ".pyd"
-#endif
-
-#define PYD_UNTAGGED_SUFFIX PYD_DEBUG_SUFFIX ".pyd"
-
 const char *_PyImport_DynLoadFiletab[] = {
     PYD_TAGGED_SUFFIX,
     PYD_UNTAGGED_SUFFIX,
diff --git a/Python/executor.c b/Python/executor.c
index ac9104223da8ff..6da3af08822fd0 100644
--- a/Python/executor.c
+++ b/Python/executor.c
@@ -2,6 +2,7 @@
 
 #include "opcode.h"
 
+#include "pycore_bitutils.h"
 #include "pycore_call.h"
 #include "pycore_ceval.h"
 #include "pycore_dict.h"
@@ -30,6 +31,16 @@
         goto deoptimize;         \
     }
 
+#ifdef Py_STATS
+// Disable these macros that apply to Tier 1 stats when we are in Tier 2
+#undef STAT_INC
+#define STAT_INC(opname, name) ((void)0)
+#undef STAT_DEC
+#define STAT_DEC(opname, name) ((void)0)
+#undef CALL_STAT_INC
+#define CALL_STAT_INC(name) ((void)0)
+#endif
+
 #undef ENABLE_SPECIALIZATION
 #define ENABLE_SPECIALIZATION 0
 
@@ -62,12 +73,15 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
 
     CHECK_EVAL_BREAKER();
 
-    OBJECT_STAT_INC(optimization_traces_executed);
+    OPT_STAT_INC(traces_executed);
     _Py_CODEUNIT *ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
     int pc = 0;
     int opcode;
     int oparg;
     uint64_t operand;
+#ifdef Py_STATS
+    uint64_t trace_uop_execution_counter = 0;
+#endif
 
     for (;;) {
         opcode = self->trace[pc].opcode;
@@ -81,7 +95,12 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
                 operand,
                 (int)(stack_pointer - _PyFrame_Stackbase(frame)));
         pc++;
-        OBJECT_STAT_INC(optimization_uops_executed);
+        OPT_STAT_INC(uops_executed);
+        UOP_EXE_INC(opcode);
+#ifdef Py_STATS
+        trace_uop_execution_counter++;
+#endif
+
         switch (opcode) {
 
 #include "executor_cases.c.h"
@@ -114,6 +133,7 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
     // On ERROR_IF we return NULL as the frame.
     // The caller recovers the frame from tstate->current_frame.
     DPRINTF(2, "Error: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
+    OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
     _PyFrame_SetStackPointer(frame, stack_pointer);
     Py_DECREF(self);
     return NULL;
@@ -122,6 +142,7 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
     // On DEOPT_IF we just repeat the last instruction.
     // This presumes nothing was popped from the stack (nor pushed).
     DPRINTF(2, "DEOPT: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
+    OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
     frame->prev_instr--;  // Back up to just before destination
     _PyFrame_SetStackPointer(frame, stack_pointer);
     Py_DECREF(self);
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index 55a03c9a23a572..e2f4f9805b79fa 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -12,10 +12,10 @@
             DEOPT_IF(_Py_emscripten_signal_clock == 0, RESUME);
             _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING;
 #endif
-            /* Possibly combine these two checks */
-            DEOPT_IF(_PyFrame_GetCode(frame)->_co_instrumentation_version
-                != tstate->interp->monitoring_version, RESUME);
-            DEOPT_IF(_Py_atomic_load_relaxed_int32(&tstate->interp->ceval.eval_breaker), RESUME);
+            uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker);
+            uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
+            assert((version & _PY_EVAL_EVENTS_MASK) == 0);
+            DEOPT_IF(eval_breaker != version, RESUME);
             break;
         }
 
@@ -236,8 +236,8 @@
             PyObject *left;
             right = stack_pointer[-1];
             left = stack_pointer[-2];
-            DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP);
-            DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP);
+            DEOPT_IF(!PyLong_CheckExact(left), _GUARD_BOTH_INT);
+            DEOPT_IF(!PyLong_CheckExact(right), _GUARD_BOTH_INT);
             break;
         }
 
@@ -294,8 +294,8 @@
             PyObject *left;
             right = stack_pointer[-1];
             left = stack_pointer[-2];
-            DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP);
-            DEOPT_IF(!PyFloat_CheckExact(right), BINARY_OP);
+            DEOPT_IF(!PyFloat_CheckExact(left), _GUARD_BOTH_FLOAT);
+            DEOPT_IF(!PyFloat_CheckExact(right), _GUARD_BOTH_FLOAT);
             break;
         }
 
@@ -352,8 +352,8 @@
             PyObject *left;
             right = stack_pointer[-1];
             left = stack_pointer[-2];
-            DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_OP);
-            DEOPT_IF(!PyUnicode_CheckExact(right), BINARY_OP);
+            DEOPT_IF(!PyUnicode_CheckExact(left), _GUARD_BOTH_UNICODE);
+            DEOPT_IF(!PyUnicode_CheckExact(right), _GUARD_BOTH_UNICODE);
             break;
         }
 
@@ -1189,8 +1189,8 @@
         case _GUARD_GLOBALS_VERSION: {
             uint16_t version = (uint16_t)operand;
             PyDictObject *dict = (PyDictObject *)GLOBALS();
-            DEOPT_IF(!PyDict_CheckExact(dict), LOAD_GLOBAL);
-            DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL);
+            DEOPT_IF(!PyDict_CheckExact(dict), _GUARD_GLOBALS_VERSION);
+            DEOPT_IF(dict->ma_keys->dk_version != version, _GUARD_GLOBALS_VERSION);
             assert(DK_IS_UNICODE(dict->ma_keys));
             break;
         }
@@ -1198,8 +1198,8 @@
         case _GUARD_BUILTINS_VERSION: {
             uint16_t version = (uint16_t)operand;
             PyDictObject *dict = (PyDictObject *)BUILTINS();
-            DEOPT_IF(!PyDict_CheckExact(dict), LOAD_GLOBAL);
-            DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL);
+            DEOPT_IF(!PyDict_CheckExact(dict), _GUARD_BUILTINS_VERSION);
+            DEOPT_IF(dict->ma_keys->dk_version != version, _GUARD_BUILTINS_VERSION);
             assert(DK_IS_UNICODE(dict->ma_keys));
             break;
         }
@@ -1211,7 +1211,7 @@
             PyDictObject *dict = (PyDictObject *)GLOBALS();
             PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys);
             res = entries[index].me_value;
-            DEOPT_IF(res == NULL, LOAD_GLOBAL);
+            DEOPT_IF(res == NULL, _LOAD_GLOBAL_MODULE);
             Py_INCREF(res);
             STAT_INC(LOAD_GLOBAL, hit);
             null = NULL;
@@ -1229,7 +1229,7 @@
             PyDictObject *bdict = (PyDictObject *)BUILTINS();
             PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(bdict->ma_keys);
             res = entries[index].me_value;
-            DEOPT_IF(res == NULL, LOAD_GLOBAL);
+            DEOPT_IF(res == NULL, _LOAD_GLOBAL_BUILTINS);
             Py_INCREF(res);
             STAT_INC(LOAD_GLOBAL, hit);
             null = NULL;
@@ -1679,7 +1679,7 @@
             uint32_t type_version = (uint32_t)operand;
             PyTypeObject *tp = Py_TYPE(owner);
             assert(type_version != 0);
-            DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR);
+            DEOPT_IF(tp->tp_version_tag != type_version, _GUARD_TYPE_VERSION);
             break;
         }
 
@@ -1689,9 +1689,7 @@
             assert(Py_TYPE(owner)->tp_dictoffset < 0);
             assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
             PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner);
-            DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) &&
-                     !_PyObject_MakeInstanceAttributesFromDict(owner, dorv),
-                     LOAD_ATTR);
+            DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && !_PyObject_MakeInstanceAttributesFromDict(owner, dorv), _CHECK_MANAGED_OBJECT_HAS_VALUES);
             break;
         }
 
@@ -1703,7 +1701,83 @@
             uint16_t index = (uint16_t)operand;
             PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
             attr = _PyDictOrValues_GetValues(dorv)->values[index];
-            DEOPT_IF(attr == NULL, LOAD_ATTR);
+            DEOPT_IF(attr == NULL, _LOAD_ATTR_INSTANCE_VALUE);
+            STAT_INC(LOAD_ATTR, hit);
+            Py_INCREF(attr);
+            null = NULL;
+            Py_DECREF(owner);
+            STACK_GROW(((oparg & 1) ? 1 : 0));
+            stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = attr;
+            if (oparg & 1) { stack_pointer[-(oparg & 1 ? 1 : 0)] = null; }
+            break;
+        }
+
+        case _CHECK_ATTR_MODULE: {
+            PyObject *owner;
+            owner = stack_pointer[-1];
+            uint32_t type_version = (uint32_t)operand;
+            DEOPT_IF(!PyModule_CheckExact(owner), _CHECK_ATTR_MODULE);
+            PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner)->md_dict;
+            assert(dict != NULL);
+            DEOPT_IF(dict->ma_keys->dk_version != type_version, _CHECK_ATTR_MODULE);
+            break;
+        }
+
+        case _LOAD_ATTR_MODULE: {
+            PyObject *owner;
+            PyObject *attr;
+            PyObject *null = NULL;
+            owner = stack_pointer[-1];
+            uint16_t index = (uint16_t)operand;
+            PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner)->md_dict;
+            assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE);
+            assert(index < dict->ma_keys->dk_nentries);
+            PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + index;
+            attr = ep->me_value;
+            DEOPT_IF(attr == NULL, _LOAD_ATTR_MODULE);
+            STAT_INC(LOAD_ATTR, hit);
+            Py_INCREF(attr);
+            null = NULL;
+            Py_DECREF(owner);
+            STACK_GROW(((oparg & 1) ? 1 : 0));
+            stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = attr;
+            if (oparg & 1) { stack_pointer[-(oparg & 1 ? 1 : 0)] = null; }
+            break;
+        }
+
+        case _CHECK_ATTR_WITH_HINT: {
+            PyObject *owner;
+            owner = stack_pointer[-1];
+            assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
+            PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
+            DEOPT_IF(_PyDictOrValues_IsValues(dorv), _CHECK_ATTR_WITH_HINT);
+            PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(dorv);
+            DEOPT_IF(dict == NULL, _CHECK_ATTR_WITH_HINT);
+            assert(PyDict_CheckExact((PyObject *)dict));
+            break;
+        }
+
+        case _LOAD_ATTR_WITH_HINT: {
+            PyObject *owner;
+            PyObject *attr;
+            PyObject *null = NULL;
+            owner = stack_pointer[-1];
+            uint16_t hint = (uint16_t)operand;
+            PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
+            PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(dorv);
+            DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries, _LOAD_ATTR_WITH_HINT);
+            PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1);
+            if (DK_IS_UNICODE(dict->ma_keys)) {
+                PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint;
+                DEOPT_IF(ep->me_key != name, _LOAD_ATTR_WITH_HINT);
+                attr = ep->me_value;
+            }
+            else {
+                PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + hint;
+                DEOPT_IF(ep->me_key != name, _LOAD_ATTR_WITH_HINT);
+                attr = ep->me_value;
+            }
+            DEOPT_IF(attr == NULL, _LOAD_ATTR_WITH_HINT);
             STAT_INC(LOAD_ATTR, hit);
             Py_INCREF(attr);
             null = NULL;
@@ -1722,7 +1796,7 @@
             uint16_t index = (uint16_t)operand;
             char *addr = (char *)owner + index;
             attr = *(PyObject **)addr;
-            DEOPT_IF(attr == NULL, LOAD_ATTR);
+            DEOPT_IF(attr == NULL, _LOAD_ATTR_SLOT);
             STAT_INC(LOAD_ATTR, hit);
             Py_INCREF(attr);
             null = NULL;
@@ -1733,12 +1807,39 @@
             break;
         }
 
+        case _CHECK_ATTR_CLASS: {
+            PyObject *owner;
+            owner = stack_pointer[-1];
+            uint32_t type_version = (uint32_t)operand;
+            DEOPT_IF(!PyType_Check(owner), _CHECK_ATTR_CLASS);
+            assert(type_version != 0);
+            DEOPT_IF(((PyTypeObject *)owner)->tp_version_tag != type_version, _CHECK_ATTR_CLASS);
+            break;
+        }
+
+        case _LOAD_ATTR_CLASS: {
+            PyObject *owner;
+            PyObject *attr;
+            PyObject *null = NULL;
+            owner = stack_pointer[-1];
+            PyObject *descr = (PyObject *)operand;
+            STAT_INC(LOAD_ATTR, hit);
+            assert(descr != NULL);
+            attr = Py_NewRef(descr);
+            null = NULL;
+            Py_DECREF(owner);
+            STACK_GROW(((oparg & 1) ? 1 : 0));
+            stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = attr;
+            if (oparg & 1) { stack_pointer[-(oparg & 1 ? 1 : 0)] = null; }
+            break;
+        }
+
         case _GUARD_DORV_VALUES: {
             PyObject *owner;
             owner = stack_pointer[-1];
             assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
             PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
-            DEOPT_IF(!_PyDictOrValues_IsValues(dorv), STORE_ATTR);
+            DEOPT_IF(!_PyDictOrValues_IsValues(dorv), _GUARD_DORV_VALUES);
             break;
         }
 
@@ -1764,16 +1865,6 @@
             break;
         }
 
-        case _GUARD_TYPE_VERSION_STORE: {
-            PyObject *owner;
-            owner = stack_pointer[-1];
-            uint32_t type_version = (uint32_t)operand;
-            PyTypeObject *tp = Py_TYPE(owner);
-            assert(type_version != 0);
-            DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR);
-            break;
-        }
-
         case _STORE_ATTR_SLOT: {
             PyObject *owner;
             PyObject *value;
@@ -2114,7 +2205,7 @@
         case _ITER_CHECK_LIST: {
             PyObject *iter;
             iter = stack_pointer[-1];
-            DEOPT_IF(Py_TYPE(iter) != &PyListIter_Type, FOR_ITER);
+            DEOPT_IF(Py_TYPE(iter) != &PyListIter_Type, _ITER_CHECK_LIST);
             break;
         }
 
@@ -2159,7 +2250,7 @@
         case _ITER_CHECK_TUPLE: {
             PyObject *iter;
             iter = stack_pointer[-1];
-            DEOPT_IF(Py_TYPE(iter) != &PyTupleIter_Type, FOR_ITER);
+            DEOPT_IF(Py_TYPE(iter) != &PyTupleIter_Type, _ITER_CHECK_TUPLE);
             break;
         }
 
@@ -2205,7 +2296,7 @@
             PyObject *iter;
             iter = stack_pointer[-1];
             _PyRangeIterObject *r = (_PyRangeIterObject *)iter;
-            DEOPT_IF(Py_TYPE(r) != &PyRangeIter_Type, FOR_ITER);
+            DEOPT_IF(Py_TYPE(r) != &PyRangeIter_Type, _ITER_CHECK_RANGE);
             break;
         }
 
@@ -2300,9 +2391,7 @@
             owner = stack_pointer[-1];
             assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
             PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner);
-            DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) &&
-                     !_PyObject_MakeInstanceAttributesFromDict(owner, dorv),
-                     LOAD_ATTR);
+            DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && !_PyObject_MakeInstanceAttributesFromDict(owner, dorv), _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT);
             break;
         }
 
@@ -2312,8 +2401,7 @@
             uint32_t keys_version = (uint32_t)operand;
             PyTypeObject *owner_cls = Py_TYPE(owner);
             PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls;
-            DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version !=
-                     keys_version, LOAD_ATTR);
+            DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version != keys_version, _GUARD_KEYS_VERSION);
             break;
         }
 
@@ -2355,13 +2443,71 @@
             break;
         }
 
+        case _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: {
+            PyObject *owner;
+            PyObject *attr;
+            owner = stack_pointer[-1];
+            PyObject *descr = (PyObject *)operand;
+            assert((oparg & 1) == 0);
+            STAT_INC(LOAD_ATTR, hit);
+            assert(descr != NULL);
+            Py_DECREF(owner);
+            attr = Py_NewRef(descr);
+            stack_pointer[-1] = attr;
+            break;
+        }
+
+        case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT: {
+            PyObject *owner;
+            PyObject *attr;
+            owner = stack_pointer[-1];
+            PyObject *descr = (PyObject *)operand;
+            assert((oparg & 1) == 0);
+            assert(Py_TYPE(owner)->tp_dictoffset == 0);
+            STAT_INC(LOAD_ATTR, hit);
+            assert(descr != NULL);
+            Py_DECREF(owner);
+            attr = Py_NewRef(descr);
+            stack_pointer[-1] = attr;
+            break;
+        }
+
+        case _CHECK_ATTR_METHOD_LAZY_DICT: {
+            PyObject *owner;
+            owner = stack_pointer[-1];
+            Py_ssize_t dictoffset = Py_TYPE(owner)->tp_dictoffset;
+            assert(dictoffset > 0);
+            PyObject *dict = *(PyObject **)((char *)owner + dictoffset);
+            /* This object has a __dict__, just not yet created */
+            DEOPT_IF(dict != NULL, _CHECK_ATTR_METHOD_LAZY_DICT);
+            break;
+        }
+
+        case _LOAD_ATTR_METHOD_LAZY_DICT: {
+            PyObject *owner;
+            PyObject *attr;
+            PyObject *self;
+            owner = stack_pointer[-1];
+            PyObject *descr = (PyObject *)operand;
+            assert(oparg & 1);
+            STAT_INC(LOAD_ATTR, hit);
+            assert(descr != NULL);
+            assert(_PyType_HasFeature(Py_TYPE(descr), Py_TPFLAGS_METHOD_DESCRIPTOR));
+            attr = Py_NewRef(descr);
+            self = owner;
+            STACK_GROW(1);
+            stack_pointer[-2] = attr;
+            stack_pointer[-1] = self;
+            break;
+        }
+
         case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: {
             PyObject *null;
             PyObject *callable;
             null = stack_pointer[-1 - oparg];
             callable = stack_pointer[-2 - oparg];
-            DEOPT_IF(null != NULL, CALL);
-            DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type, CALL);
+            DEOPT_IF(null != NULL, _CHECK_CALL_BOUND_METHOD_EXACT_ARGS);
+            DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type, _CHECK_CALL_BOUND_METHOD_EXACT_ARGS);
             break;
         }
 
@@ -2382,7 +2528,7 @@
         }
 
         case _CHECK_PEP_523: {
-            DEOPT_IF(tstate->interp->eval_frame, CALL);
+            DEOPT_IF(tstate->interp->eval_frame, _CHECK_PEP_523);
             break;
         }
 
@@ -2392,11 +2538,11 @@
             self_or_null = stack_pointer[-1 - oparg];
             callable = stack_pointer[-2 - oparg];
             uint32_t func_version = (uint32_t)operand;
-            DEOPT_IF(!PyFunction_Check(callable), CALL);
+            DEOPT_IF(!PyFunction_Check(callable), _CHECK_FUNCTION_EXACT_ARGS);
             PyFunctionObject *func = (PyFunctionObject *)callable;
-            DEOPT_IF(func->func_version != func_version, CALL);
+            DEOPT_IF(func->func_version != func_version, _CHECK_FUNCTION_EXACT_ARGS);
             PyCodeObject *code = (PyCodeObject *)func->func_code;
-            DEOPT_IF(code->co_argcount != oparg + (self_or_null != NULL), CALL);
+            DEOPT_IF(code->co_argcount != oparg + (self_or_null != NULL), _CHECK_FUNCTION_EXACT_ARGS);
             break;
         }
 
@@ -2405,8 +2551,8 @@
             callable = stack_pointer[-2 - oparg];
             PyFunctionObject *func = (PyFunctionObject *)callable;
             PyCodeObject *code = (PyCodeObject *)func->func_code;
-            DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL);
-            DEOPT_IF(tstate->py_recursion_remaining <= 1, CALL);
+            DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), _CHECK_STACK_SPACE);
+            DEOPT_IF(tstate->py_recursion_remaining <= 1, _CHECK_STACK_SPACE);
             break;
         }
 
@@ -2671,8 +2817,7 @@
                 total_args++;
             }
             DEOPT_IF(!PyCFunction_CheckExact(callable), CALL);
-            DEOPT_IF(PyCFunction_GET_FLAGS(callable) !=
-                (METH_FASTCALL | METH_KEYWORDS), CALL);
+            DEOPT_IF(PyCFunction_GET_FLAGS(callable) != (METH_FASTCALL | METH_KEYWORDS), CALL);
             STAT_INC(CALL, hit);
             /* res = func(self, args, nargs, kwnames) */
             _PyCFunctionFastWithKeywords cfunc =
@@ -3144,6 +3289,7 @@
             frame->prev_instr--;  // Back up to just before destination
             _PyFrame_SetStackPointer(frame, stack_pointer);
             Py_DECREF(self);
+            OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
             return frame;
             break;
         }
diff --git a/Python/fileutils.c b/Python/fileutils.c
index 9bc1de2db84006..17a4ae56ef0528 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -2,8 +2,11 @@
 #include "pycore_fileutils.h"     // fileutils definitions
 #include "pycore_runtime.h"       // _PyRuntime
 #include "osdefs.h"               // SEP
-#include <locale.h>
+
 #include <stdlib.h>               // mbstowcs()
+#ifdef HAVE_UNISTD_H
+#  include <unistd.h>             // getcwd()
+#endif
 
 #ifdef MS_WINDOWS
 #  include <malloc.h>
@@ -19,7 +22,7 @@ extern int winerror_to_errno(int);
 #endif
 
 #ifdef HAVE_LANGINFO_H
-#include <langinfo.h>
+#  include <langinfo.h>           // nl_langinfo(CODESET)
 #endif
 
 #ifdef HAVE_SYS_IOCTL_H
@@ -27,12 +30,12 @@ extern int winerror_to_errno(int);
 #endif
 
 #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
-#include <iconv.h>
+#  include <iconv.h>              // iconv_open()
 #endif
 
 #ifdef HAVE_FCNTL_H
-#include <fcntl.h>
-#endif /* HAVE_FCNTL_H */
+#  include <fcntl.h>              // fcntl(F_GETFD)
+#endif
 
 #ifdef O_CLOEXEC
 /* Does open() support the O_CLOEXEC flag? Possible values:
diff --git a/Python/frozenmain.c b/Python/frozenmain.c
index 767f9804903a9e..3ce9476c9ad46c 100644
--- a/Python/frozenmain.c
+++ b/Python/frozenmain.c
@@ -3,7 +3,11 @@
 #include "Python.h"
 #include "pycore_pystate.h"       // _Py_GetConfig()
 #include "pycore_runtime.h"       // _PyRuntime_Initialize()
-#include <locale.h>
+
+#ifdef HAVE_UNISTD_H
+#  include <unistd.h>             // isatty()
+#endif
+
 
 #ifdef MS_WINDOWS
 extern void PyWinFreeze_ExeInit(void);
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index 2701d416648a20..eac136846b169f 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -12,7 +12,12 @@
             static_assert(0 == 0, "incorrect cache size");
             TIER_ONE_ONLY
             assert(frame == tstate->current_frame);
-            if (_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version) {
+            uintptr_t global_version =
+                _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) &
+                ~_PY_EVAL_EVENTS_MASK;
+            uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
+            assert((code_version & 255) == 0);
+            if (code_version != global_version) {
                 int err = _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp);
                 if (err) goto error;
                 next_instr--;
@@ -31,19 +36,17 @@
             DEOPT_IF(_Py_emscripten_signal_clock == 0, RESUME);
             _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING;
 #endif
-            /* Possibly combine these two checks */
-            DEOPT_IF(_PyFrame_GetCode(frame)->_co_instrumentation_version
-                != tstate->interp->monitoring_version, RESUME);
-            DEOPT_IF(_Py_atomic_load_relaxed_int32(&tstate->interp->ceval.eval_breaker), RESUME);
+            uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker);
+            uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
+            assert((version & _PY_EVAL_EVENTS_MASK) == 0);
+            DEOPT_IF(eval_breaker != version, RESUME);
             DISPATCH();
         }
 
         TARGET(INSTRUMENTED_RESUME) {
-            /* Possible performance enhancement:
-             *   We need to check the eval breaker anyway, can we
-             * combine the instrument verison check and the eval breaker test?
-             */
-            if (_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version) {
+            uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & ~_PY_EVAL_EVENTS_MASK;
+            uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
+            if (code_version != global_version) {
                 if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) {
                     goto error;
                 }
@@ -1294,8 +1297,7 @@
             receiver = stack_pointer[-2];
             DEOPT_IF(tstate->interp->eval_frame, SEND);
             PyGenObject *gen = (PyGenObject *)receiver;
-            DEOPT_IF(Py_TYPE(gen) != &PyGen_Type &&
-                     Py_TYPE(gen) != &PyCoro_Type, SEND);
+            DEOPT_IF(Py_TYPE(gen) != &PyGen_Type && Py_TYPE(gen) != &PyCoro_Type, SEND);
             DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, SEND);
             STAT_INC(SEND, hit);
             _PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
@@ -2410,9 +2412,7 @@
                 assert(Py_TYPE(owner)->tp_dictoffset < 0);
                 assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
                 PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner);
-                DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) &&
-                         !_PyObject_MakeInstanceAttributesFromDict(owner, dorv),
-                         LOAD_ATTR);
+                DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && !_PyObject_MakeInstanceAttributesFromDict(owner, dorv), LOAD_ATTR);
             }
             // _LOAD_ATTR_INSTANCE_VALUE
             {
@@ -2436,22 +2436,29 @@
             PyObject *owner;
             PyObject *attr;
             PyObject *null = NULL;
+            // _CHECK_ATTR_MODULE
             owner = stack_pointer[-1];
-            uint32_t type_version = read_u32(&next_instr[1].cache);
-            uint16_t index = read_u16(&next_instr[3].cache);
-            DEOPT_IF(!PyModule_CheckExact(owner), LOAD_ATTR);
-            PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner)->md_dict;
-            assert(dict != NULL);
-            DEOPT_IF(dict->ma_keys->dk_version != type_version, LOAD_ATTR);
-            assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE);
-            assert(index < dict->ma_keys->dk_nentries);
-            PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + index;
-            attr = ep->me_value;
-            DEOPT_IF(attr == NULL, LOAD_ATTR);
-            STAT_INC(LOAD_ATTR, hit);
-            Py_INCREF(attr);
-            null = NULL;
-            Py_DECREF(owner);
+            {
+                uint32_t type_version = read_u32(&next_instr[1].cache);
+                DEOPT_IF(!PyModule_CheckExact(owner), LOAD_ATTR);
+                PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner)->md_dict;
+                assert(dict != NULL);
+                DEOPT_IF(dict->ma_keys->dk_version != type_version, LOAD_ATTR);
+            }
+            // _LOAD_ATTR_MODULE
+            {
+                uint16_t index = read_u16(&next_instr[3].cache);
+                PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner)->md_dict;
+                assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE);
+                assert(index < dict->ma_keys->dk_nentries);
+                PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + index;
+                attr = ep->me_value;
+                DEOPT_IF(attr == NULL, LOAD_ATTR);
+                STAT_INC(LOAD_ATTR, hit);
+                Py_INCREF(attr);
+                null = NULL;
+                Py_DECREF(owner);
+            }
             STACK_GROW(((oparg & 1) ? 1 : 0));
             stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = attr;
             if (oparg & 1) { stack_pointer[-(oparg & 1 ? 1 : 0)] = null; }
@@ -2463,36 +2470,46 @@
             PyObject *owner;
             PyObject *attr;
             PyObject *null = NULL;
+            // _GUARD_TYPE_VERSION
             owner = stack_pointer[-1];
-            uint32_t type_version = read_u32(&next_instr[1].cache);
-            uint16_t index = read_u16(&next_instr[3].cache);
-            PyTypeObject *tp = Py_TYPE(owner);
-            assert(type_version != 0);
-            DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR);
-            assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT);
-            PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
-            DEOPT_IF(_PyDictOrValues_IsValues(dorv), LOAD_ATTR);
-            PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(dorv);
-            DEOPT_IF(dict == NULL, LOAD_ATTR);
-            assert(PyDict_CheckExact((PyObject *)dict));
-            PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1);
-            uint16_t hint = index;
-            DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries, LOAD_ATTR);
-            if (DK_IS_UNICODE(dict->ma_keys)) {
-                PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint;
-                DEOPT_IF(ep->me_key != name, LOAD_ATTR);
-                attr = ep->me_value;
+            {
+                uint32_t type_version = read_u32(&next_instr[1].cache);
+                PyTypeObject *tp = Py_TYPE(owner);
+                assert(type_version != 0);
+                DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR);
             }
-            else {
-                PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + hint;
-                DEOPT_IF(ep->me_key != name, LOAD_ATTR);
-                attr = ep->me_value;
+            // _CHECK_ATTR_WITH_HINT
+            {
+                assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
+                PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
+                DEOPT_IF(_PyDictOrValues_IsValues(dorv), LOAD_ATTR);
+                PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(dorv);
+                DEOPT_IF(dict == NULL, LOAD_ATTR);
+                assert(PyDict_CheckExact((PyObject *)dict));
+            }
+            // _LOAD_ATTR_WITH_HINT
+            {
+                uint16_t hint = read_u16(&next_instr[3].cache);
+                PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
+                PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(dorv);
+                DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries, LOAD_ATTR);
+                PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1);
+                if (DK_IS_UNICODE(dict->ma_keys)) {
+                    PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint;
+                    DEOPT_IF(ep->me_key != name, LOAD_ATTR);
+                    attr = ep->me_value;
+                }
+                else {
+                    PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + hint;
+                    DEOPT_IF(ep->me_key != name, LOAD_ATTR);
+                    attr = ep->me_value;
+                }
+                DEOPT_IF(attr == NULL, LOAD_ATTR);
+                STAT_INC(LOAD_ATTR, hit);
+                Py_INCREF(attr);
+                null = NULL;
+                Py_DECREF(owner);
             }
-            DEOPT_IF(attr == NULL, LOAD_ATTR);
-            STAT_INC(LOAD_ATTR, hit);
-            Py_INCREF(attr);
-            null = NULL;
-            Py_DECREF(owner);
             STACK_GROW(((oparg & 1) ? 1 : 0));
             stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = attr;
             if (oparg & 1) { stack_pointer[-(oparg & 1 ? 1 : 0)] = null; }
@@ -2534,21 +2551,23 @@
             PyObject *owner;
             PyObject *attr;
             PyObject *null = NULL;
+            // _CHECK_ATTR_CLASS
             owner = stack_pointer[-1];
-            uint32_t type_version = read_u32(&next_instr[1].cache);
-            PyObject *descr = read_obj(&next_instr[5].cache);
-
-            DEOPT_IF(!PyType_Check(owner), LOAD_ATTR);
-            DEOPT_IF(((PyTypeObject *)owner)->tp_version_tag != type_version,
-                LOAD_ATTR);
-            assert(type_version != 0);
-
-            STAT_INC(LOAD_ATTR, hit);
-            null = NULL;
-            attr = descr;
-            assert(attr != NULL);
-            Py_INCREF(attr);
-            Py_DECREF(owner);
+            {
+                uint32_t type_version = read_u32(&next_instr[1].cache);
+                DEOPT_IF(!PyType_Check(owner), LOAD_ATTR);
+                assert(type_version != 0);
+                DEOPT_IF(((PyTypeObject *)owner)->tp_version_tag != type_version, LOAD_ATTR);
+            }
+            // _LOAD_ATTR_CLASS
+            {
+                PyObject *descr = read_obj(&next_instr[5].cache);
+                STAT_INC(LOAD_ATTR, hit);
+                assert(descr != NULL);
+                attr = Py_NewRef(descr);
+                null = NULL;
+                Py_DECREF(owner);
+            }
             STACK_GROW(((oparg & 1) ? 1 : 0));
             stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = attr;
             if (oparg & 1) { stack_pointer[-(oparg & 1 ? 1 : 0)] = null; }
@@ -2621,7 +2640,7 @@
         TARGET(STORE_ATTR_INSTANCE_VALUE) {
             PyObject *owner;
             PyObject *value;
-            // _GUARD_TYPE_VERSION_STORE
+            // _GUARD_TYPE_VERSION
             owner = stack_pointer[-1];
             {
                 uint32_t type_version = read_u32(&next_instr[1].cache);
@@ -2710,7 +2729,7 @@
         TARGET(STORE_ATTR_SLOT) {
             PyObject *owner;
             PyObject *value;
-            // _GUARD_TYPE_VERSION_STORE
+            // _GUARD_TYPE_VERSION
             owner = stack_pointer[-1];
             {
                 uint32_t type_version = read_u32(&next_instr[1].cache);
@@ -2969,7 +2988,7 @@
                 // Double-check that the opcode isn't instrumented or something:
                 here->op.code == JUMP_BACKWARD)
             {
-                OBJECT_STAT_INC(optimization_attempts);
+                OPT_STAT_INC(attempts);
                 int optimized = _PyOptimizer_BackEdge(frame, here, next_instr, stack_pointer);
                 if (optimized < 0) goto error;
                 if (optimized) {
@@ -3591,17 +3610,14 @@
             {
                 assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
                 PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner);
-                DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) &&
-                         !_PyObject_MakeInstanceAttributesFromDict(owner, dorv),
-                         LOAD_ATTR);
+                DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && !_PyObject_MakeInstanceAttributesFromDict(owner, dorv), LOAD_ATTR);
             }
             // _GUARD_KEYS_VERSION
             {
                 uint32_t keys_version = read_u32(&next_instr[3].cache);
                 PyTypeObject *owner_cls = Py_TYPE(owner);
                 PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls;
-                DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version !=
-                         keys_version, LOAD_ATTR);
+                DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version != keys_version, LOAD_ATTR);
             }
             // _LOAD_ATTR_METHOD_WITH_VALUES
             {
@@ -3654,26 +3670,36 @@
         TARGET(LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES) {
             PyObject *owner;
             PyObject *attr;
+            // _GUARD_TYPE_VERSION
             owner = stack_pointer[-1];
-            uint32_t type_version = read_u32(&next_instr[1].cache);
-            uint32_t keys_version = read_u32(&next_instr[3].cache);
-            PyObject *descr = read_obj(&next_instr[5].cache);
-            assert((oparg & 1) == 0);
-            PyTypeObject *owner_cls = Py_TYPE(owner);
-            assert(type_version != 0);
-            DEOPT_IF(owner_cls->tp_version_tag != type_version, LOAD_ATTR);
-            assert(owner_cls->tp_flags & Py_TPFLAGS_MANAGED_DICT);
-            PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner);
-            DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) &&
-                     !_PyObject_MakeInstanceAttributesFromDict(owner, dorv),
-                     LOAD_ATTR);
-            PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls;
-            DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version !=
-                     keys_version, LOAD_ATTR);
-            STAT_INC(LOAD_ATTR, hit);
-            assert(descr != NULL);
-            Py_DECREF(owner);
-            attr = Py_NewRef(descr);
+            {
+                uint32_t type_version = read_u32(&next_instr[1].cache);
+                PyTypeObject *tp = Py_TYPE(owner);
+                assert(type_version != 0);
+                DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR);
+            }
+            // _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT
+            {
+                assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
+                PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner);
+                DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && !_PyObject_MakeInstanceAttributesFromDict(owner, dorv), LOAD_ATTR);
+            }
+            // _GUARD_KEYS_VERSION
+            {
+                uint32_t keys_version = read_u32(&next_instr[3].cache);
+                PyTypeObject *owner_cls = Py_TYPE(owner);
+                PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls;
+                DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version != keys_version, LOAD_ATTR);
+            }
+            // _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES
+            {
+                PyObject *descr = read_obj(&next_instr[5].cache);
+                assert((oparg & 1) == 0);
+                STAT_INC(LOAD_ATTR, hit);
+                assert(descr != NULL);
+                Py_DECREF(owner);
+                attr = Py_NewRef(descr);
+            }
             stack_pointer[-1] = attr;
             next_instr += 9;
             DISPATCH();
@@ -3682,18 +3708,24 @@
         TARGET(LOAD_ATTR_NONDESCRIPTOR_NO_DICT) {
             PyObject *owner;
             PyObject *attr;
+            // _GUARD_TYPE_VERSION
             owner = stack_pointer[-1];
-            uint32_t type_version = read_u32(&next_instr[1].cache);
-            PyObject *descr = read_obj(&next_instr[5].cache);
-            assert((oparg & 1) == 0);
-            PyTypeObject *owner_cls = Py_TYPE(owner);
-            assert(type_version != 0);
-            DEOPT_IF(owner_cls->tp_version_tag != type_version, LOAD_ATTR);
-            assert(owner_cls->tp_dictoffset == 0);
-            STAT_INC(LOAD_ATTR, hit);
-            assert(descr != NULL);
-            Py_DECREF(owner);
-            attr = Py_NewRef(descr);
+            {
+                uint32_t type_version = read_u32(&next_instr[1].cache);
+                PyTypeObject *tp = Py_TYPE(owner);
+                assert(type_version != 0);
+                DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR);
+            }
+            // _LOAD_ATTR_NONDESCRIPTOR_NO_DICT
+            {
+                PyObject *descr = read_obj(&next_instr[5].cache);
+                assert((oparg & 1) == 0);
+                assert(Py_TYPE(owner)->tp_dictoffset == 0);
+                STAT_INC(LOAD_ATTR, hit);
+                assert(descr != NULL);
+                Py_DECREF(owner);
+                attr = Py_NewRef(descr);
+            }
             stack_pointer[-1] = attr;
             next_instr += 9;
             DISPATCH();
@@ -3703,22 +3735,32 @@
             PyObject *owner;
             PyObject *attr;
             PyObject *self;
+            // _GUARD_TYPE_VERSION
             owner = stack_pointer[-1];
-            uint32_t type_version = read_u32(&next_instr[1].cache);
-            PyObject *descr = read_obj(&next_instr[5].cache);
-            assert(oparg & 1);
-            PyTypeObject *owner_cls = Py_TYPE(owner);
-            DEOPT_IF(owner_cls->tp_version_tag != type_version, LOAD_ATTR);
-            Py_ssize_t dictoffset = owner_cls->tp_dictoffset;
-            assert(dictoffset > 0);
-            PyObject *dict = *(PyObject **)((char *)owner + dictoffset);
-            /* This object has a __dict__, just not yet created */
-            DEOPT_IF(dict != NULL, LOAD_ATTR);
-            STAT_INC(LOAD_ATTR, hit);
-            assert(descr != NULL);
-            assert(_PyType_HasFeature(Py_TYPE(descr), Py_TPFLAGS_METHOD_DESCRIPTOR));
-            attr = Py_NewRef(descr);
-            self = owner;
+            {
+                uint32_t type_version = read_u32(&next_instr[1].cache);
+                PyTypeObject *tp = Py_TYPE(owner);
+                assert(type_version != 0);
+                DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR);
+            }
+            // _CHECK_ATTR_METHOD_LAZY_DICT
+            {
+                Py_ssize_t dictoffset = Py_TYPE(owner)->tp_dictoffset;
+                assert(dictoffset > 0);
+                PyObject *dict = *(PyObject **)((char *)owner + dictoffset);
+                /* This object has a __dict__, just not yet created */
+                DEOPT_IF(dict != NULL, LOAD_ATTR);
+            }
+            // _LOAD_ATTR_METHOD_LAZY_DICT
+            {
+                PyObject *descr = read_obj(&next_instr[5].cache);
+                assert(oparg & 1);
+                STAT_INC(LOAD_ATTR, hit);
+                assert(descr != NULL);
+                assert(_PyType_HasFeature(Py_TYPE(descr), Py_TPFLAGS_METHOD_DESCRIPTOR));
+                attr = Py_NewRef(descr);
+                self = owner;
+            }
             STACK_GROW(1);
             stack_pointer[-2] = attr;
             stack_pointer[-1] = self;
@@ -4328,8 +4370,7 @@
                 total_args++;
             }
             DEOPT_IF(!PyCFunction_CheckExact(callable), CALL);
-            DEOPT_IF(PyCFunction_GET_FLAGS(callable) !=
-                (METH_FASTCALL | METH_KEYWORDS), CALL);
+            DEOPT_IF(PyCFunction_GET_FLAGS(callable) != (METH_FASTCALL | METH_KEYWORDS), CALL);
             STAT_INC(CALL, hit);
             /* res = func(self, args, nargs, kwnames) */
             _PyCFunctionFastWithKeywords cfunc =
diff --git a/Python/import.c b/Python/import.c
index 5636968ed9e63b..cafdd834502224 100644
--- a/Python/import.c
+++ b/Python/import.c
@@ -17,7 +17,7 @@
 #include "pycore_weakref.h"       // _PyWeakref_GET_REF()
 
 #include "marshal.h"              // PyMarshal_ReadObjectFromString()
-#include "importdl.h"             // _PyImport_DynLoadFiletab
+#include "pycore_importdl.h"      // _PyImport_DynLoadFiletab
 #include "pydtrace.h"             // PyDTrace_IMPORT_FIND_LOAD_START_ENABLED()
 #include <stdbool.h>              // bool
 
diff --git a/Python/importdl.c b/Python/importdl.c
index 9ab0a5ad33aaac..7dfd301d77efb4 100644
--- a/Python/importdl.c
+++ b/Python/importdl.c
@@ -15,7 +15,7 @@
 */
 #ifdef HAVE_DYNAMIC_LOADING
 
-#include "importdl.h"
+#include "pycore_importdl.h"
 
 #ifdef MS_WINDOWS
 extern dl_funcptr _PyImport_FindSharedFuncptrWindows(const char *prefix,
diff --git a/Python/importdl.h b/Python/importdl.h
deleted file mode 100644
index 9171adc2770689..00000000000000
--- a/Python/importdl.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef Py_IMPORTDL_H
-#define Py_IMPORTDL_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-extern const char *_PyImport_DynLoadFiletab[];
-
-extern PyObject *_PyImport_LoadDynamicModuleWithSpec(PyObject *spec, FILE *);
-
-typedef PyObject *(*PyModInitFunction)(void);
-
-/* Max length of module suffix searched for -- accommodates "module.slb" */
-#define MAXSUFFIXSIZE 12
-
-#ifdef MS_WINDOWS
-#include <windows.h>
-typedef FARPROC dl_funcptr;
-#else
-typedef void (*dl_funcptr)(void);
-#endif
-
-
-#ifdef __cplusplus
-}
-#endif
-#endif /* !Py_IMPORTDL_H */
diff --git a/Python/initconfig.c b/Python/initconfig.c
index a0467f51d4834e..6b76b4dc681b74 100644
--- a/Python/initconfig.c
+++ b/Python/initconfig.c
@@ -24,6 +24,105 @@
 #  endif
 #endif
 
+/* --- PyConfig spec ---------------------------------------------- */
+
+typedef enum {
+    PyConfig_MEMBER_INT = 0,
+    PyConfig_MEMBER_UINT = 1,
+    PyConfig_MEMBER_ULONG = 2,
+
+    PyConfig_MEMBER_WSTR = 10,
+    PyConfig_MEMBER_WSTR_OPT = 11,
+    PyConfig_MEMBER_WSTR_LIST = 12,
+} PyConfigMemberType;
+
+typedef struct {
+    const char *name;
+    size_t offset;
+    PyConfigMemberType type;
+} PyConfigSpec;
+
+#define SPEC(MEMBER, TYPE) \
+    {#MEMBER, offsetof(PyConfig, MEMBER), PyConfig_MEMBER_##TYPE}
+
+static const PyConfigSpec PYCONFIG_SPEC[] = {
+    SPEC(_config_init, UINT),
+    SPEC(isolated, UINT),
+    SPEC(use_environment, UINT),
+    SPEC(dev_mode, UINT),
+    SPEC(install_signal_handlers, UINT),
+    SPEC(use_hash_seed, UINT),
+    SPEC(hash_seed, ULONG),
+    SPEC(faulthandler, UINT),
+    SPEC(tracemalloc, UINT),
+    SPEC(perf_profiling, UINT),
+    SPEC(import_time, UINT),
+    SPEC(code_debug_ranges, UINT),
+    SPEC(show_ref_count, UINT),
+    SPEC(dump_refs, UINT),
+    SPEC(dump_refs_file, WSTR_OPT),
+    SPEC(malloc_stats, UINT),
+    SPEC(filesystem_encoding, WSTR),
+    SPEC(filesystem_errors, WSTR),
+    SPEC(pycache_prefix, WSTR_OPT),
+    SPEC(parse_argv, UINT),
+    SPEC(orig_argv, WSTR_LIST),
+    SPEC(argv, WSTR_LIST),
+    SPEC(xoptions, WSTR_LIST),
+    SPEC(warnoptions, WSTR_LIST),
+    SPEC(site_import, UINT),
+    SPEC(bytes_warning, UINT),
+    SPEC(warn_default_encoding, UINT),
+    SPEC(inspect, UINT),
+    SPEC(interactive, UINT),
+    SPEC(optimization_level, UINT),
+    SPEC(parser_debug, UINT),
+    SPEC(write_bytecode, UINT),
+    SPEC(verbose, UINT),
+    SPEC(quiet, UINT),
+    SPEC(user_site_directory, UINT),
+    SPEC(configure_c_stdio, UINT),
+    SPEC(buffered_stdio, UINT),
+    SPEC(stdio_encoding, WSTR),
+    SPEC(stdio_errors, WSTR),
+#ifdef MS_WINDOWS
+    SPEC(legacy_windows_stdio, UINT),
+#endif
+    SPEC(check_hash_pycs_mode, WSTR),
+    SPEC(use_frozen_modules, UINT),
+    SPEC(safe_path, UINT),
+    SPEC(int_max_str_digits, INT),
+    SPEC(pathconfig_warnings, UINT),
+    SPEC(program_name, WSTR),
+    SPEC(pythonpath_env, WSTR_OPT),
+    SPEC(home, WSTR_OPT),
+    SPEC(platlibdir, WSTR),
+    SPEC(sys_path_0, WSTR_OPT),
+    SPEC(module_search_paths_set, UINT),
+    SPEC(module_search_paths, WSTR_LIST),
+    SPEC(stdlib_dir, WSTR_OPT),
+    SPEC(executable, WSTR_OPT),
+    SPEC(base_executable, WSTR_OPT),
+    SPEC(prefix, WSTR_OPT),
+    SPEC(base_prefix, WSTR_OPT),
+    SPEC(exec_prefix, WSTR_OPT),
+    SPEC(base_exec_prefix, WSTR_OPT),
+    SPEC(skip_source_first_line, UINT),
+    SPEC(run_command, WSTR_OPT),
+    SPEC(run_module, WSTR_OPT),
+    SPEC(run_filename, WSTR_OPT),
+    SPEC(_install_importlib, UINT),
+    SPEC(_init_main, UINT),
+    SPEC(_is_python_build, UINT),
+#ifdef Py_STATS
+    SPEC(_pystats, UINT),
+#endif
+    {NULL, 0, 0},
+};
+
+#undef SPEC
+
+
 /* --- Command line options --------------------------------------- */
 
 /* Short usage message (with %s for argv0) */
@@ -672,6 +771,7 @@ PyConfig_Clear(PyConfig *config)
     CLEAR(config->exec_prefix);
     CLEAR(config->base_exec_prefix);
     CLEAR(config->platlibdir);
+    CLEAR(config->sys_path_0);
 
     CLEAR(config->filesystem_encoding);
     CLEAR(config->filesystem_errors);
@@ -869,103 +969,47 @@ PyConfig_SetBytesString(PyConfig *config, wchar_t **config_str,
 PyStatus
 _PyConfig_Copy(PyConfig *config, const PyConfig *config2)
 {
-    PyStatus status;
-
     PyConfig_Clear(config);
 
-#define COPY_ATTR(ATTR) config->ATTR = config2->ATTR
-#define COPY_WSTR_ATTR(ATTR) \
-    do { \
-        status = PyConfig_SetString(config, &config->ATTR, config2->ATTR); \
-        if (_PyStatus_EXCEPTION(status)) { \
-            return status; \
-        } \
-    } while (0)
-#define COPY_WSTRLIST(LIST) \
-    do { \
-        if (_PyWideStringList_Copy(&config->LIST, &config2->LIST) < 0) { \
-            return _PyStatus_NO_MEMORY(); \
-        } \
-    } while (0)
-
-    COPY_ATTR(_config_init);
-    COPY_ATTR(isolated);
-    COPY_ATTR(use_environment);
-    COPY_ATTR(dev_mode);
-    COPY_ATTR(install_signal_handlers);
-    COPY_ATTR(use_hash_seed);
-    COPY_ATTR(hash_seed);
-    COPY_ATTR(_install_importlib);
-    COPY_ATTR(faulthandler);
-    COPY_ATTR(tracemalloc);
-    COPY_ATTR(perf_profiling);
-    COPY_ATTR(import_time);
-    COPY_ATTR(code_debug_ranges);
-    COPY_ATTR(show_ref_count);
-    COPY_ATTR(dump_refs);
-    COPY_ATTR(dump_refs_file);
-    COPY_ATTR(malloc_stats);
-
-    COPY_WSTR_ATTR(pycache_prefix);
-    COPY_WSTR_ATTR(pythonpath_env);
-    COPY_WSTR_ATTR(home);
-    COPY_WSTR_ATTR(program_name);
-
-    COPY_ATTR(parse_argv);
-    COPY_WSTRLIST(argv);
-    COPY_WSTRLIST(warnoptions);
-    COPY_WSTRLIST(xoptions);
-    COPY_WSTRLIST(module_search_paths);
-    COPY_ATTR(module_search_paths_set);
-    COPY_WSTR_ATTR(stdlib_dir);
-
-    COPY_WSTR_ATTR(executable);
-    COPY_WSTR_ATTR(base_executable);
-    COPY_WSTR_ATTR(prefix);
-    COPY_WSTR_ATTR(base_prefix);
-    COPY_WSTR_ATTR(exec_prefix);
-    COPY_WSTR_ATTR(base_exec_prefix);
-    COPY_WSTR_ATTR(platlibdir);
-
-    COPY_ATTR(site_import);
-    COPY_ATTR(bytes_warning);
-    COPY_ATTR(warn_default_encoding);
-    COPY_ATTR(inspect);
-    COPY_ATTR(interactive);
-    COPY_ATTR(optimization_level);
-    COPY_ATTR(parser_debug);
-    COPY_ATTR(write_bytecode);
-    COPY_ATTR(verbose);
-    COPY_ATTR(quiet);
-    COPY_ATTR(user_site_directory);
-    COPY_ATTR(configure_c_stdio);
-    COPY_ATTR(buffered_stdio);
-    COPY_WSTR_ATTR(filesystem_encoding);
-    COPY_WSTR_ATTR(filesystem_errors);
-    COPY_WSTR_ATTR(stdio_encoding);
-    COPY_WSTR_ATTR(stdio_errors);
-#ifdef MS_WINDOWS
-    COPY_ATTR(legacy_windows_stdio);
-#endif
-    COPY_ATTR(skip_source_first_line);
-    COPY_WSTR_ATTR(run_command);
-    COPY_WSTR_ATTR(run_module);
-    COPY_WSTR_ATTR(run_filename);
-    COPY_WSTR_ATTR(check_hash_pycs_mode);
-    COPY_ATTR(pathconfig_warnings);
-    COPY_ATTR(_init_main);
-    COPY_ATTR(use_frozen_modules);
-    COPY_ATTR(safe_path);
-    COPY_WSTRLIST(orig_argv);
-    COPY_ATTR(_is_python_build);
-    COPY_ATTR(int_max_str_digits);
-#ifdef Py_STATS
-    COPY_ATTR(_pystats);
-#endif
-
-#undef COPY_ATTR
-#undef COPY_WSTR_ATTR
-#undef COPY_WSTRLIST
+    PyStatus status;
+    const PyConfigSpec *spec = PYCONFIG_SPEC;
+    for (; spec->name != NULL; spec++) {
+        char *member = (char *)config + spec->offset;
+        char *member2 = (char *)config2 + spec->offset;
+        switch (spec->type) {
+        case PyConfig_MEMBER_INT:
+        case PyConfig_MEMBER_UINT:
+        {
+            *(int*)member = *(int*)member2;
+            break;
+        }
+        case PyConfig_MEMBER_ULONG:
+        {
+            *(unsigned long*)member = *(unsigned long*)member2;
+            break;
+        }
+        case PyConfig_MEMBER_WSTR:
+        case PyConfig_MEMBER_WSTR_OPT:
+        {
+            const wchar_t *str = *(const wchar_t**)member2;
+            status = PyConfig_SetString(config, (wchar_t**)member, str);
+            if (_PyStatus_EXCEPTION(status)) {
+                return status;
+            }
+            break;
+        }
+        case PyConfig_MEMBER_WSTR_LIST:
+        {
+            if (_PyWideStringList_Copy((PyWideStringList*)member,
+                                       (const PyWideStringList*)member2) < 0) {
+                return _PyStatus_NO_MEMORY();
+            }
+            break;
+        }
+        default:
+            Py_UNREACHABLE();
+        }
+    }
     return _PyStatus_OK();
 }
 
@@ -978,113 +1022,58 @@ _PyConfig_AsDict(const PyConfig *config)
         return NULL;
     }
 
-#define SET_ITEM(KEY, EXPR) \
-        do { \
-            PyObject *obj = (EXPR); \
-            if (obj == NULL) { \
-                goto fail; \
-            } \
-            int res = PyDict_SetItemString(dict, (KEY), obj); \
-            Py_DECREF(obj); \
-            if (res < 0) { \
-                goto fail; \
-            } \
-        } while (0)
-#define SET_ITEM_INT(ATTR) \
-    SET_ITEM(#ATTR, PyLong_FromLong(config->ATTR))
-#define SET_ITEM_UINT(ATTR) \
-    SET_ITEM(#ATTR, PyLong_FromUnsignedLong(config->ATTR))
-#define FROM_WSTRING(STR) \
-    ((STR != NULL) ? \
-        PyUnicode_FromWideChar(STR, -1) \
-        : Py_NewRef(Py_None))
-#define SET_ITEM_WSTR(ATTR) \
-    SET_ITEM(#ATTR, FROM_WSTRING(config->ATTR))
-#define SET_ITEM_WSTRLIST(LIST) \
-    SET_ITEM(#LIST, _PyWideStringList_AsList(&config->LIST))
-
-    SET_ITEM_INT(_config_init);
-    SET_ITEM_INT(isolated);
-    SET_ITEM_INT(use_environment);
-    SET_ITEM_INT(dev_mode);
-    SET_ITEM_INT(install_signal_handlers);
-    SET_ITEM_INT(use_hash_seed);
-    SET_ITEM_UINT(hash_seed);
-    SET_ITEM_INT(faulthandler);
-    SET_ITEM_INT(tracemalloc);
-    SET_ITEM_INT(perf_profiling);
-    SET_ITEM_INT(import_time);
-    SET_ITEM_INT(code_debug_ranges);
-    SET_ITEM_INT(show_ref_count);
-    SET_ITEM_INT(dump_refs);
-    SET_ITEM_INT(malloc_stats);
-    SET_ITEM_WSTR(filesystem_encoding);
-    SET_ITEM_WSTR(filesystem_errors);
-    SET_ITEM_WSTR(pycache_prefix);
-    SET_ITEM_WSTR(program_name);
-    SET_ITEM_INT(parse_argv);
-    SET_ITEM_WSTRLIST(argv);
-    SET_ITEM_WSTRLIST(xoptions);
-    SET_ITEM_WSTRLIST(warnoptions);
-    SET_ITEM_WSTR(pythonpath_env);
-    SET_ITEM_WSTR(home);
-    SET_ITEM_INT(module_search_paths_set);
-    SET_ITEM_WSTRLIST(module_search_paths);
-    SET_ITEM_WSTR(stdlib_dir);
-    SET_ITEM_WSTR(executable);
-    SET_ITEM_WSTR(base_executable);
-    SET_ITEM_WSTR(prefix);
-    SET_ITEM_WSTR(base_prefix);
-    SET_ITEM_WSTR(exec_prefix);
-    SET_ITEM_WSTR(base_exec_prefix);
-    SET_ITEM_WSTR(platlibdir);
-    SET_ITEM_INT(site_import);
-    SET_ITEM_INT(bytes_warning);
-    SET_ITEM_INT(warn_default_encoding);
-    SET_ITEM_INT(inspect);
-    SET_ITEM_INT(interactive);
-    SET_ITEM_INT(optimization_level);
-    SET_ITEM_INT(parser_debug);
-    SET_ITEM_INT(write_bytecode);
-    SET_ITEM_INT(verbose);
-    SET_ITEM_INT(quiet);
-    SET_ITEM_INT(user_site_directory);
-    SET_ITEM_INT(configure_c_stdio);
-    SET_ITEM_INT(buffered_stdio);
-    SET_ITEM_WSTR(stdio_encoding);
-    SET_ITEM_WSTR(stdio_errors);
-#ifdef MS_WINDOWS
-    SET_ITEM_INT(legacy_windows_stdio);
-#endif
-    SET_ITEM_INT(skip_source_first_line);
-    SET_ITEM_WSTR(run_command);
-    SET_ITEM_WSTR(run_module);
-    SET_ITEM_WSTR(run_filename);
-    SET_ITEM_INT(_install_importlib);
-    SET_ITEM_WSTR(check_hash_pycs_mode);
-    SET_ITEM_INT(pathconfig_warnings);
-    SET_ITEM_INT(_init_main);
-    SET_ITEM_WSTRLIST(orig_argv);
-    SET_ITEM_INT(use_frozen_modules);
-    SET_ITEM_INT(safe_path);
-    SET_ITEM_INT(_is_python_build);
-    SET_ITEM_INT(int_max_str_digits);
-#ifdef Py_STATS
-    SET_ITEM_INT(_pystats);
-#endif
+    const PyConfigSpec *spec = PYCONFIG_SPEC;
+    for (; spec->name != NULL; spec++) {
+        char *member = (char *)config + spec->offset;
+        PyObject *obj;
+        switch (spec->type) {
+        case PyConfig_MEMBER_INT:
+        case PyConfig_MEMBER_UINT:
+        {
+            int value = *(int*)member;
+            obj = PyLong_FromLong(value);
+            break;
+        }
+        case PyConfig_MEMBER_ULONG:
+        {
+            unsigned long value = *(unsigned long*)member;
+            obj = PyLong_FromUnsignedLong(value);
+            break;
+        }
+        case PyConfig_MEMBER_WSTR:
+        case PyConfig_MEMBER_WSTR_OPT:
+        {
+            const wchar_t *wstr = *(const wchar_t**)member;
+            if (wstr != NULL) {
+                obj = PyUnicode_FromWideChar(wstr, -1);
+            }
+            else {
+                obj = Py_NewRef(Py_None);
+            }
+            break;
+        }
+        case PyConfig_MEMBER_WSTR_LIST:
+        {
+            const PyWideStringList *list = (const PyWideStringList*)member;
+            obj = _PyWideStringList_AsList(list);
+            break;
+        }
+        default:
+            Py_UNREACHABLE();
+        }
 
+        if (obj == NULL) {
+            Py_DECREF(dict);
+            return NULL;
+        }
+        int res = PyDict_SetItemString(dict, spec->name, obj);
+        Py_DECREF(obj);
+        if (res < 0) {
+            Py_DECREF(dict);
+            return NULL;
+        }
+    }
     return dict;
-
-fail:
-    Py_DECREF(dict);
-    return NULL;
-
-#undef FROM_WSTRING
-#undef SET_ITEM
-#undef SET_ITEM_INT
-#undef SET_ITEM_UINT
-#undef SET_ITEM_WSTR
-#undef SET_ITEM_WSTRLIST
 }
 
 
@@ -1263,131 +1252,81 @@ _PyConfig_FromDict(PyConfig *config, PyObject *dict)
         return -1;
     }
 
-#define CHECK_VALUE(NAME, TEST) \
-    if (!(TEST)) { \
-        config_dict_invalid_value(NAME); \
-        return -1; \
+    const PyConfigSpec *spec = PYCONFIG_SPEC;
+    for (; spec->name != NULL; spec++) {
+        char *member = (char *)config + spec->offset;
+        switch (spec->type) {
+        case PyConfig_MEMBER_INT:
+            if (config_dict_get_int(dict, spec->name, (int*)member) < 0) {
+                return -1;
+            }
+            break;
+        case PyConfig_MEMBER_UINT:
+        {
+            int value;
+            if (config_dict_get_int(dict, spec->name, &value) < 0) {
+                return -1;
+            }
+            if (value < 0) {
+                config_dict_invalid_value(spec->name);
+                return -1;
+            }
+            *(int*)member = value;
+            break;
+        }
+        case PyConfig_MEMBER_ULONG:
+        {
+            if (config_dict_get_ulong(dict, spec->name,
+                                      (unsigned long*)member) < 0) {
+                return -1;
+            }
+            break;
+        }
+        case PyConfig_MEMBER_WSTR:
+        {
+            wchar_t **wstr = (wchar_t**)member;
+            if (config_dict_get_wstr(dict, spec->name, config, wstr) < 0) {
+                return -1;
+            }
+            if (*wstr == NULL) {
+                config_dict_invalid_value(spec->name);
+                return -1;
+            }
+            break;
+        }
+        case PyConfig_MEMBER_WSTR_OPT:
+        {
+            wchar_t **wstr = (wchar_t**)member;
+            if (config_dict_get_wstr(dict, spec->name, config, wstr) < 0) {
+                return -1;
+            }
+            break;
+        }
+        case PyConfig_MEMBER_WSTR_LIST:
+        {
+            if (config_dict_get_wstrlist(dict, spec->name, config,
+                                         (PyWideStringList*)member) < 0) {
+                return -1;
+            }
+            break;
+        }
+        default:
+            Py_UNREACHABLE();
+        }
     }
-#define GET_UINT(KEY) \
-    do { \
-        if (config_dict_get_int(dict, #KEY, &config->KEY) < 0) { \
-            return -1; \
-        } \
-        CHECK_VALUE(#KEY, config->KEY >= 0); \
-    } while (0)
-#define GET_INT(KEY) \
-    do { \
-        if (config_dict_get_int(dict, #KEY, &config->KEY) < 0) { \
-            return -1; \
-        } \
-    } while (0)
-#define GET_WSTR(KEY) \
-    do { \
-        if (config_dict_get_wstr(dict, #KEY, config, &config->KEY) < 0) { \
-            return -1; \
-        } \
-        CHECK_VALUE(#KEY, config->KEY != NULL); \
-    } while (0)
-#define GET_WSTR_OPT(KEY) \
-    do { \
-        if (config_dict_get_wstr(dict, #KEY, config, &config->KEY) < 0) { \
-            return -1; \
-        } \
-    } while (0)
-#define GET_WSTRLIST(KEY) \
-    do { \
-        if (config_dict_get_wstrlist(dict, #KEY, config, &config->KEY) < 0) { \
-            return -1; \
-        } \
-    } while (0)
 
-    GET_UINT(_config_init);
-    CHECK_VALUE("_config_init",
-                config->_config_init == _PyConfig_INIT_COMPAT
-                || config->_config_init == _PyConfig_INIT_PYTHON
-                || config->_config_init == _PyConfig_INIT_ISOLATED);
-    GET_UINT(isolated);
-    GET_UINT(use_environment);
-    GET_UINT(dev_mode);
-    GET_UINT(install_signal_handlers);
-    GET_UINT(use_hash_seed);
-    if (config_dict_get_ulong(dict, "hash_seed", &config->hash_seed) < 0) {
+    if (!(config->_config_init == _PyConfig_INIT_COMPAT
+          || config->_config_init == _PyConfig_INIT_PYTHON
+          || config->_config_init == _PyConfig_INIT_ISOLATED))
+    {
+        config_dict_invalid_value("_config_init");
         return -1;
     }
-    CHECK_VALUE("hash_seed", config->hash_seed <= MAX_HASH_SEED);
-    GET_UINT(faulthandler);
-    GET_UINT(tracemalloc);
-    GET_UINT(perf_profiling);
-    GET_UINT(import_time);
-    GET_UINT(code_debug_ranges);
-    GET_UINT(show_ref_count);
-    GET_UINT(dump_refs);
-    GET_UINT(malloc_stats);
-    GET_WSTR(filesystem_encoding);
-    GET_WSTR(filesystem_errors);
-    GET_WSTR_OPT(pycache_prefix);
-    GET_UINT(parse_argv);
-    GET_WSTRLIST(orig_argv);
-    GET_WSTRLIST(argv);
-    GET_WSTRLIST(xoptions);
-    GET_WSTRLIST(warnoptions);
-    GET_UINT(site_import);
-    GET_UINT(bytes_warning);
-    GET_UINT(warn_default_encoding);
-    GET_UINT(inspect);
-    GET_UINT(interactive);
-    GET_UINT(optimization_level);
-    GET_UINT(parser_debug);
-    GET_UINT(write_bytecode);
-    GET_UINT(verbose);
-    GET_UINT(quiet);
-    GET_UINT(user_site_directory);
-    GET_UINT(configure_c_stdio);
-    GET_UINT(buffered_stdio);
-    GET_WSTR(stdio_encoding);
-    GET_WSTR(stdio_errors);
-#ifdef MS_WINDOWS
-    GET_UINT(legacy_windows_stdio);
-#endif
-    GET_WSTR(check_hash_pycs_mode);
-
-    GET_UINT(pathconfig_warnings);
-    GET_WSTR(program_name);
-    GET_WSTR_OPT(pythonpath_env);
-    GET_WSTR_OPT(home);
-    GET_WSTR(platlibdir);
-
-    // Path configuration output
-    GET_UINT(module_search_paths_set);
-    GET_WSTRLIST(module_search_paths);
-    GET_WSTR_OPT(stdlib_dir);
-    GET_WSTR_OPT(executable);
-    GET_WSTR_OPT(base_executable);
-    GET_WSTR_OPT(prefix);
-    GET_WSTR_OPT(base_prefix);
-    GET_WSTR_OPT(exec_prefix);
-    GET_WSTR_OPT(base_exec_prefix);
-
-    GET_UINT(skip_source_first_line);
-    GET_WSTR_OPT(run_command);
-    GET_WSTR_OPT(run_module);
-    GET_WSTR_OPT(run_filename);
-
-    GET_UINT(_install_importlib);
-    GET_UINT(_init_main);
-    GET_UINT(use_frozen_modules);
-    GET_UINT(safe_path);
-    GET_UINT(_is_python_build);
-    GET_INT(int_max_str_digits);
-#ifdef Py_STATS
-    GET_UINT(_pystats);
-#endif
 
-#undef CHECK_VALUE
-#undef GET_UINT
-#undef GET_INT
-#undef GET_WSTR
-#undef GET_WSTR_OPT
+    if (config->hash_seed > MAX_HASH_SEED) {
+        config_dict_invalid_value("hash_seed");
+        return -1;
+    }
     return 0;
 }
 
@@ -3114,6 +3053,7 @@ _Py_DumpPathConfig(PyThreadState *tstate)
     PySys_WriteStderr("  import site = %i\n", config->site_import);
     PySys_WriteStderr("  is in build tree = %i\n", config->_is_python_build);
     DUMP_CONFIG("stdlib dir", stdlib_dir);
+    DUMP_CONFIG("sys.path[0]", sys_path_0);
 #undef DUMP_CONFIG
 
 #define DUMP_SYS(NAME) \
diff --git a/Python/instrumentation.c b/Python/instrumentation.c
index 0b974f6133ce7d..eee1908e503e43 100644
--- a/Python/instrumentation.c
+++ b/Python/instrumentation.c
@@ -4,6 +4,7 @@
 
 #include "pycore_bitutils.h"      // _Py_popcount32
 #include "pycore_call.h"
+#include "pycore_ceval.h"         // _PY_EVAL_EVENTS_BITS
 #include "pycore_code.h"          // _PyCode_Clear_Executors()
 #include "pycore_frame.h"
 #include "pycore_interp.h"
@@ -895,10 +896,27 @@ static inline int most_significant_bit(uint8_t bits) {
     return MOST_SIGNIFICANT_BITS[bits];
 }
 
+static uint32_t
+global_version(PyInterpreterState *interp)
+{
+    return interp->ceval.eval_breaker & ~_PY_EVAL_EVENTS_MASK;
+}
+
+static void
+set_global_version(PyInterpreterState *interp, uint32_t version)
+{
+    assert((version & _PY_EVAL_EVENTS_MASK) == 0);
+    uintptr_t old = _Py_atomic_load_uintptr(&interp->ceval.eval_breaker);
+    intptr_t new;
+    do {
+        new = (old & _PY_EVAL_EVENTS_MASK) | version;
+    } while (!_Py_atomic_compare_exchange_uintptr(&interp->ceval.eval_breaker, &old, new));
+}
+
 static bool
 is_version_up_to_date(PyCodeObject *code, PyInterpreterState *interp)
 {
-    return interp->monitoring_version == code->_co_instrumentation_version;
+    return global_version(interp) == code->_co_instrumentation_version;
 }
 
 #ifndef NDEBUG
@@ -1556,7 +1574,7 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp)
 {
     if (is_version_up_to_date(code, interp)) {
         assert(
-            interp->monitoring_version == 0 ||
+            (interp->ceval.eval_breaker & ~_PY_EVAL_EVENTS_MASK) == 0 ||
             instrumentation_cross_checks(interp, code)
         );
         return 0;
@@ -1594,7 +1612,7 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp)
         assert(monitors_are_empty(monitors_and(new_events, removed_events)));
     }
     code->_co_monitoring->active_monitors = active_events;
-    code->_co_instrumentation_version = interp->monitoring_version;
+    code->_co_instrumentation_version = global_version(interp);
     if (monitors_are_empty(new_events) && monitors_are_empty(removed_events)) {
 #ifdef INSTRUMENT_DEBUG
         sanity_check_instrumentation(code);
@@ -1761,6 +1779,10 @@ check_tool(PyInterpreterState *interp, int tool_id)
     return 0;
 }
 
+/* We share the eval-breaker with flags, so the monitoring
+ * version goes in the top 24 bits */
+#define MONITORING_VERSION_INCREMENT (1 << _PY_EVAL_EVENTS_BITS)
+
 int
 _PyMonitoring_SetEvents(int tool_id, _PyMonitoringEventSet events)
 {
@@ -1775,7 +1797,12 @@ _PyMonitoring_SetEvents(int tool_id, _PyMonitoringEventSet events)
         return 0;
     }
     set_events(&interp->monitors, tool_id, events);
-    interp->monitoring_version++;
+    uint32_t new_version = global_version(interp) + MONITORING_VERSION_INCREMENT;
+    if (new_version == 0) {
+        PyErr_Format(PyExc_OverflowError, "events set too many times");
+        return -1;
+    }
+    set_global_version(interp, new_version);
     return instrument_all_executing_code_objects(interp);
 }
 
@@ -1803,7 +1830,7 @@ _PyMonitoring_SetLocalEvents(PyCodeObject *code, int tool_id, _PyMonitoringEvent
     set_local_events(local, tool_id, events);
     if (is_version_up_to_date(code, interp)) {
         /* Force instrumentation update */
-        code->_co_instrumentation_version = UINT64_MAX;
+        code->_co_instrumentation_version -= MONITORING_VERSION_INCREMENT;
     }
     if (_Py_Instrument(code, interp)) {
         return -1;
@@ -2086,8 +2113,14 @@ monitoring_restart_events_impl(PyObject *module)
      * last restart version < current version
      */
     PyInterpreterState *interp = _PyInterpreterState_GET();
-    interp->last_restart_version = interp->monitoring_version + 1;
-    interp->monitoring_version = interp->last_restart_version + 1;
+    uint32_t restart_version = global_version(interp) + MONITORING_VERSION_INCREMENT;
+    uint32_t new_version = restart_version + MONITORING_VERSION_INCREMENT;
+    if (new_version <= MONITORING_VERSION_INCREMENT) {
+        PyErr_Format(PyExc_OverflowError, "events set too many times");
+        return NULL;
+    }
+    interp->last_restart_version = restart_version;
+    set_global_version(interp, new_version);
     if (instrument_all_executing_code_objects(interp)) {
         return NULL;
     }
diff --git a/Python/optimizer.c b/Python/optimizer.c
index fbdbf7291784c4..65b9638be25e98 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -461,6 +461,7 @@ translate_bytecode_to_trace(
     if (trace_length + (n) > max_length) { \
         DPRINTF(2, "No room for %s (need %d, got %d)\n", \
                 (opname), (n), max_length - trace_length); \
+        OPT_STAT_INC(trace_too_long); \
         goto done; \
     } \
     reserved = (n);  // Keep ADD_TO_TRACE / ADD_TO_STUB honest
@@ -472,6 +473,7 @@ translate_bytecode_to_trace(
 #define TRACE_STACK_PUSH() \
     if (trace_stack_depth >= TRACE_STACK_SIZE) { \
         DPRINTF(2, "Trace stack overflow\n"); \
+        OPT_STAT_INC(trace_stack_overflow); \
         ADD_TO_TRACE(_SET_IP, 0, 0); \
         goto done; \
     } \
@@ -572,6 +574,7 @@ translate_bytecode_to_trace(
                     ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0);
                 }
                 else {
+                    OPT_STAT_INC(inner_loop);
                     DPRINTF(2, "JUMP_BACKWARD not to top ends trace\n");
                 }
                 goto done;
@@ -638,7 +641,9 @@ translate_bytecode_to_trace(
                         // LOAD_CONST + _POP_FRAME.
                         if (trace_stack_depth == 0) {
                             DPRINTF(2, "Trace stack underflow\n");
-                            goto done;}
+                            OPT_STAT_INC(trace_stack_underflow);
+                            goto done;
+                        }
                     }
                     uint32_t orig_oparg = oparg;  // For OPARG_TOP/BOTTOM
                     for (int i = 0; i < nuops; i++) {
@@ -713,6 +718,7 @@ translate_bytecode_to_trace(
                                             PyUnicode_AsUTF8(new_code->co_qualname),
                                             PyUnicode_AsUTF8(new_code->co_filename),
                                             new_code->co_firstlineno);
+                                    OPT_STAT_INC(recursive_call);
                                     ADD_TO_TRACE(_SET_IP, 0, 0);
                                     goto done;
                                 }
@@ -744,6 +750,7 @@ translate_bytecode_to_trace(
                     break;
                 }
                 DPRINTF(2, "Unsupported opcode %s\n", uop_name(opcode));
+                OPT_UNSUPPORTED_OPCODE(opcode);
                 goto done;  // Break out of loop
             }  // End default
 
@@ -791,6 +798,7 @@ translate_bytecode_to_trace(
         return trace_length;
     }
     else {
+        OPT_STAT_INC(trace_too_short);
         DPRINTF(4,
                 "No trace for %s (%s:%d) at byte offset %d\n",
                 PyUnicode_AsUTF8(code->co_qualname),
@@ -891,7 +899,8 @@ uop_optimize(
         // Error or nothing translated
         return trace_length;
     }
-    OBJECT_STAT_INC(optimization_traces_created);
+    OPT_HIST(trace_length, trace_length_hist);
+    OPT_STAT_INC(traces_created);
     char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE");
     if (uop_optimize != NULL && *uop_optimize > '0') {
         trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries);
@@ -901,6 +910,7 @@ uop_optimize(
     if (executor == NULL) {
         return -1;
     }
+    OPT_HIST(trace_length, optimized_trace_length_hist);
     executor->base.execute = _PyUopExecute;
     memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction));
     *exec_ptr = (_PyExecutorObject *)executor;
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 23f66ec3601df6..14033162377489 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -37,6 +37,9 @@
 
 #include <locale.h>               // setlocale()
 #include <stdlib.h>               // getenv()
+#ifdef HAVE_UNISTD_H
+#  include <unistd.h>             // isatty()
+#endif
 
 #if defined(__APPLE__)
 #  include <mach-o/loader.h>
@@ -652,13 +655,12 @@ pycore_create_interpreter(_PyRuntimeState *runtime,
         return status;
     }
 
-    PyThreadState *tstate = _PyThreadState_New(interp);
+    PyThreadState *tstate = _PyThreadState_New(interp,
+                                               _PyThreadState_WHENCE_INTERP);
     if (tstate == NULL) {
         return _PyStatus_ERR("can't make first thread");
     }
     _PyThreadState_Bind(tstate);
-    // XXX For now we do this before the GIL is created.
-    (void) _PyThreadState_SwapNoGIL(tstate);
 
     status = init_interp_create_gil(tstate, config.gil);
     if (_PyStatus_EXCEPTION(status)) {
@@ -1206,6 +1208,31 @@ init_interp_main(PyThreadState *tstate)
         }
     }
 
+    if (!is_main_interp) {
+        // The main interpreter is handled in Py_Main(), for now.
+        if (config->sys_path_0 != NULL) {
+            PyObject *path0 = PyUnicode_FromWideChar(config->sys_path_0, -1);
+            if (path0 == NULL) {
+                return _PyStatus_ERR("can't initialize sys.path[0]");
+            }
+            PyObject *sysdict = interp->sysdict;
+            if (sysdict == NULL) {
+                Py_DECREF(path0);
+                return _PyStatus_ERR("can't initialize sys.path[0]");
+            }
+            PyObject *sys_path = PyDict_GetItemWithError(sysdict, &_Py_ID(path));
+            if (sys_path == NULL) {
+                Py_DECREF(path0);
+                return _PyStatus_ERR("can't initialize sys.path[0]");
+            }
+            int res = PyList_Insert(sys_path, 0, path0);
+            Py_DECREF(path0);
+            if (res) {
+                return _PyStatus_ERR("can't initialize sys.path[0]");
+            }
+        }
+    }
+
     assert(!_PyErr_Occurred(tstate));
 
     return _PyStatus_OK();
@@ -2022,7 +2049,8 @@ new_interpreter(PyThreadState **tstate_p, const PyInterpreterConfig *config)
         return _PyStatus_OK();
     }
 
-    PyThreadState *tstate = _PyThreadState_New(interp);
+    PyThreadState *tstate = _PyThreadState_New(interp,
+                                               _PyThreadState_WHENCE_INTERP);
     if (tstate == NULL) {
         PyInterpreterState_Delete(interp);
         *tstate_p = NULL;
@@ -2030,8 +2058,7 @@ new_interpreter(PyThreadState **tstate_p, const PyInterpreterConfig *config)
     }
     _PyThreadState_Bind(tstate);
 
-    // XXX For now we do this before the GIL is created.
-    PyThreadState *save_tstate = _PyThreadState_SwapNoGIL(tstate);
+    PyThreadState *save_tstate = _PyThreadState_GET();
     int has_gil = 0;
 
     /* From this point until the init_interp_create_gil() call,
@@ -2043,7 +2070,7 @@ new_interpreter(PyThreadState **tstate_p, const PyInterpreterConfig *config)
     const PyConfig *src_config;
     if (save_tstate != NULL) {
         // XXX Might new_interpreter() have been called without the GIL held?
-        _PyEval_ReleaseLock(save_tstate->interp, save_tstate);
+        _PyThreadState_Detach(save_tstate);
         src_config = _PyInterpreterState_GetConfig(save_tstate->interp);
     }
     else
@@ -2090,12 +2117,11 @@ new_interpreter(PyThreadState **tstate_p, const PyInterpreterConfig *config)
     *tstate_p = NULL;
 
     /* Oops, it didn't work.  Undo it all. */
-    PyErr_PrintEx(0);
     if (has_gil) {
-        PyThreadState_Swap(save_tstate);
+        _PyThreadState_Detach(tstate);
     }
-    else {
-        _PyThreadState_SwapNoGIL(save_tstate);
+    if (save_tstate != NULL) {
+        _PyThreadState_Attach(save_tstate);
     }
     PyThreadState_Clear(tstate);
     PyThreadState_Delete(tstate);
diff --git a/Python/pystate.c b/Python/pystate.c
index 01aa2552e56f0d..a024ae7e3806a6 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -495,6 +495,8 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime)
     return _PyStatus_OK();
 }
 
+static void _xidregistry_clear(struct _xidregistry *);
+
 void
 _PyRuntimeState_Fini(_PyRuntimeState *runtime)
 {
@@ -503,6 +505,8 @@ _PyRuntimeState_Fini(_PyRuntimeState *runtime)
     assert(runtime->object_state.interpreter_leaks == 0);
 #endif
 
+    _xidregistry_clear(&runtime->xidregistry);
+
     if (gilstate_tss_initialized(runtime)) {
         gilstate_tss_fini(runtime);
     }
@@ -548,6 +552,11 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime)
     for (int i = 0; i < NUMLOCKS; i++) {
         reinit_err += _PyThread_at_fork_reinit(lockptrs[i]);
     }
+    /* PyOS_AfterFork_Child(), which calls this function, later calls
+       _PyInterpreterState_DeleteExceptMain(), so we only need to update
+       the main interpreter here. */
+    assert(runtime->interpreters.main != NULL);
+    runtime->interpreters.main->xidregistry.mutex = runtime->xidregistry.mutex;
 
     PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
 
@@ -709,6 +718,10 @@ init_interpreter(PyInterpreterState *interp,
         interp->dtoa = (struct _dtoa_state)_dtoa_state_INIT(interp);
     }
     interp->f_opcode_trace_set = false;
+
+    assert(runtime->xidregistry.mutex != NULL);
+    interp->xidregistry.mutex = runtime->xidregistry.mutex;
+
     interp->_initialized = 1;
     return _PyStatus_OK();
 }
@@ -930,6 +943,10 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
     Py_CLEAR(interp->sysdict);
     Py_CLEAR(interp->builtins);
 
+    _xidregistry_clear(&interp->xidregistry);
+    /* The lock is owned by the runtime, so we don't free it here. */
+    interp->xidregistry.mutex = NULL;
+
     if (tstate->interp == interp) {
         /* We are now safe to fix tstate->_status.cleared. */
         // XXX Do this (much) earlier?
@@ -981,6 +998,7 @@ _PyInterpreterState_Clear(PyThreadState *tstate)
 
 
 static inline void tstate_deactivate(PyThreadState *tstate);
+static void tstate_set_detached(PyThreadState *tstate);
 static void zapthreads(PyInterpreterState *interp);
 
 void
@@ -994,9 +1012,7 @@ PyInterpreterState_Delete(PyInterpreterState *interp)
     PyThreadState *tcur = current_fast_get(runtime);
     if (tcur != NULL && interp == tcur->interp) {
         /* Unset current thread.  After this, many C API calls become crashy. */
-        current_fast_clear(runtime);
-        tstate_deactivate(tcur);
-        _PyEval_ReleaseLock(interp, NULL);
+        _PyThreadState_Detach(tcur);
     }
 
     zapthreads(interp);
@@ -1091,6 +1107,61 @@ _PyInterpreterState_DeleteExceptMain(_PyRuntimeState *runtime)
 #endif
 
 
+int
+_PyInterpreterState_SetRunningMain(PyInterpreterState *interp)
+{
+    if (_PyInterpreterState_FailIfRunningMain(interp) < 0) {
+        return -1;
+    }
+    PyThreadState *tstate = current_fast_get(&_PyRuntime);
+    _Py_EnsureTstateNotNULL(tstate);
+    if (tstate->interp != interp) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "current tstate has wrong interpreter");
+        return -1;
+    }
+    interp->threads.main = tstate;
+    return 0;
+}
+
+void
+_PyInterpreterState_SetNotRunningMain(PyInterpreterState *interp)
+{
+    PyThreadState *tstate = interp->threads.main;
+    assert(tstate == current_fast_get(&_PyRuntime));
+
+    if (tstate->on_delete != NULL) {
+        // The threading module was imported for the first time in this
+        // thread, so it was set as threading._main_thread.  (See gh-75698.)
+        // The thread has finished running the Python program so we mark
+        // the thread object as finished.
+        assert(tstate->_whence != _PyThreadState_WHENCE_THREADING);
+        tstate->on_delete(tstate->on_delete_data);
+        tstate->on_delete = NULL;
+        tstate->on_delete_data = NULL;
+    }
+
+    interp->threads.main = NULL;
+}
+
+int
+_PyInterpreterState_IsRunningMain(PyInterpreterState *interp)
+{
+    return (interp->threads.main != NULL);
+}
+
+int
+_PyInterpreterState_FailIfRunningMain(PyInterpreterState *interp)
+{
+    if (interp->threads.main != NULL) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "interpreter already running");
+        return -1;
+    }
+    return 0;
+}
+
+
 //----------
 // accessors
 //----------
@@ -1150,8 +1221,10 @@ _PyInterpreterState_IDDecref(PyInterpreterState *interp)
     PyThread_release_lock(interp->id_mutex);
 
     if (refcount == 0 && interp->requires_idref) {
-        // XXX Using the "head" thread isn't strictly correct.
-        PyThreadState *tstate = PyInterpreterState_ThreadHead(interp);
+        PyThreadState *tstate = _PyThreadState_New(interp,
+                                                   _PyThreadState_WHENCE_INTERP);
+        _PyThreadState_Bind(tstate);
+
         // XXX Possible GILState issues?
         PyThreadState *save_tstate = _PyThreadState_Swap(runtime, tstate);
         Py_EndInterpreter(tstate);
@@ -1305,7 +1378,14 @@ free_threadstate(PyThreadState *tstate)
 {
     // The initial thread state of the interpreter is allocated
     // as part of the interpreter state so should not be freed.
-    if (tstate != &tstate->interp->_initial_thread) {
+    if (tstate == &tstate->interp->_initial_thread) {
+        // Restore to _PyThreadState_INIT.
+        tstate = &tstate->interp->_initial_thread;
+        memcpy(tstate,
+               &initial._main_interpreter._initial_thread,
+               sizeof(*tstate));
+    }
+    else {
         PyMem_RawFree(tstate);
     }
 }
@@ -1320,7 +1400,7 @@ free_threadstate(PyThreadState *tstate)
 
 static void
 init_threadstate(PyThreadState *tstate,
-                 PyInterpreterState *interp, uint64_t id)
+                 PyInterpreterState *interp, uint64_t id, int whence)
 {
     if (tstate->_status.initialized) {
         Py_FatalError("thread state already initialized");
@@ -1333,6 +1413,10 @@ init_threadstate(PyThreadState *tstate,
     assert(tstate->next == NULL);
     assert(tstate->prev == NULL);
 
+    assert(tstate->_whence == _PyThreadState_WHENCE_NOTSET);
+    assert(whence >= 0 && whence <= _PyThreadState_WHENCE_EXEC);
+    tstate->_whence = whence;
+
     assert(id > 0);
     tstate->id = id;
 
@@ -1362,8 +1446,6 @@ add_threadstate(PyInterpreterState *interp, PyThreadState *tstate,
                 PyThreadState *next)
 {
     assert(interp->threads.head != tstate);
-    assert((next != NULL && tstate->id != 1) ||
-           (next == NULL && tstate->id == 1));
     if (next != NULL) {
         assert(next->prev == NULL || next->prev == tstate);
         next->prev = tstate;
@@ -1374,7 +1456,7 @@ add_threadstate(PyInterpreterState *interp, PyThreadState *tstate,
 }
 
 static PyThreadState *
-new_threadstate(PyInterpreterState *interp)
+new_threadstate(PyInterpreterState *interp, int whence)
 {
     PyThreadState *tstate;
     _PyRuntimeState *runtime = interp->runtime;
@@ -1397,10 +1479,10 @@ new_threadstate(PyInterpreterState *interp)
     PyThreadState *old_head = interp->threads.head;
     if (old_head == NULL) {
         // It's the interpreter's initial thread state.
-        assert(id == 1);
         used_newtstate = 0;
         tstate = &interp->_initial_thread;
     }
+    // XXX Re-use interp->_initial_thread if not in use?
     else {
         // Every valid interpreter must have at least one thread.
         assert(id > 1);
@@ -1413,7 +1495,7 @@ new_threadstate(PyInterpreterState *interp)
                sizeof(*tstate));
     }
 
-    init_threadstate(tstate, interp, id);
+    init_threadstate(tstate, interp, id, whence);
     add_threadstate(interp, tstate, old_head);
 
     HEAD_UNLOCK(runtime);
@@ -1427,7 +1509,8 @@ new_threadstate(PyInterpreterState *interp)
 PyThreadState *
 PyThreadState_New(PyInterpreterState *interp)
 {
-    PyThreadState *tstate = new_threadstate(interp);
+    PyThreadState *tstate = new_threadstate(interp,
+                                            _PyThreadState_WHENCE_UNKNOWN);
     if (tstate) {
         bind_tstate(tstate);
         // This makes sure there's a gilstate tstate bound
@@ -1441,16 +1524,16 @@ PyThreadState_New(PyInterpreterState *interp)
 
 // This must be followed by a call to _PyThreadState_Bind();
 PyThreadState *
-_PyThreadState_New(PyInterpreterState *interp)
+_PyThreadState_New(PyInterpreterState *interp, int whence)
 {
-    return new_threadstate(interp);
+    return new_threadstate(interp, whence);
 }
 
 // We keep this for stable ABI compabibility.
 PyAPI_FUNC(PyThreadState*)
 _PyThreadState_Prealloc(PyInterpreterState *interp)
 {
-    return _PyThreadState_New(interp);
+    return _PyThreadState_New(interp, _PyThreadState_WHENCE_UNKNOWN);
 }
 
 // We keep this around for (accidental) stable ABI compatibility.
@@ -1547,6 +1630,12 @@ PyThreadState_Clear(PyThreadState *tstate)
     Py_CLEAR(tstate->context);
 
     if (tstate->on_delete != NULL) {
+        // For the "main" thread of each interpreter, this is meant
+        // to be done in _PyInterpreterState_SetNotRunningMain().
+        // That leaves threads created by the threading module,
+        // and any threads killed by forking.
+        // However, we also accommodate "main" threads that still
+        // don't call _PyInterpreterState_SetNotRunningMain() yet.
         tstate->on_delete(tstate->on_delete_data);
     }
 
@@ -1561,6 +1650,7 @@ static void
 tstate_delete_common(PyThreadState *tstate)
 {
     assert(tstate->_status.cleared && !tstate->_status.finalized);
+    assert(tstate->state != _Py_THREAD_ATTACHED);
 
     PyInterpreterState *interp = tstate->interp;
     if (interp == NULL) {
@@ -1621,6 +1711,7 @@ void
 _PyThreadState_DeleteCurrent(PyThreadState *tstate)
 {
     _Py_EnsureTstateNotNULL(tstate);
+    tstate_set_detached(tstate);
     tstate_delete_common(tstate);
     current_fast_clear(tstate->interp->runtime);
     _PyEval_ReleaseLock(tstate->interp, NULL);
@@ -1777,6 +1868,79 @@ tstate_deactivate(PyThreadState *tstate)
     // It will still be used in PyGILState_Ensure().
 }
 
+static int
+tstate_try_attach(PyThreadState *tstate)
+{
+#ifdef Py_NOGIL
+    int expected = _Py_THREAD_DETACHED;
+    if (_Py_atomic_compare_exchange_int(
+            &tstate->state,
+            &expected,
+            _Py_THREAD_ATTACHED)) {
+        return 1;
+    }
+    return 0;
+#else
+    assert(tstate->state == _Py_THREAD_DETACHED);
+    tstate->state = _Py_THREAD_ATTACHED;
+    return 1;
+#endif
+}
+
+static void
+tstate_set_detached(PyThreadState *tstate)
+{
+    assert(tstate->state == _Py_THREAD_ATTACHED);
+#ifdef Py_NOGIL
+    _Py_atomic_store_int(&tstate->state, _Py_THREAD_DETACHED);
+#else
+    tstate->state = _Py_THREAD_DETACHED;
+#endif
+}
+
+void
+_PyThreadState_Attach(PyThreadState *tstate)
+{
+#if defined(Py_DEBUG)
+    // This is called from PyEval_RestoreThread(). Similar
+    // to it, we need to ensure errno doesn't change.
+    int err = errno;
+#endif
+
+    _Py_EnsureTstateNotNULL(tstate);
+    if (current_fast_get(&_PyRuntime) != NULL) {
+        Py_FatalError("non-NULL old thread state");
+    }
+
+    _PyEval_AcquireLock(tstate);
+
+    // XXX assert(tstate_is_alive(tstate));
+    current_fast_set(&_PyRuntime, tstate);
+    tstate_activate(tstate);
+
+    if (!tstate_try_attach(tstate)) {
+        // TODO: Once stop-the-world GC is implemented for --disable-gil builds
+        // this will need to wait until the GC completes. For now, this case
+        // should never happen.
+        Py_FatalError("thread attach failed");
+    }
+
+#if defined(Py_DEBUG)
+    errno = err;
+#endif
+}
+
+void
+_PyThreadState_Detach(PyThreadState *tstate)
+{
+    // XXX assert(tstate_is_alive(tstate) && tstate_is_bound(tstate));
+    assert(tstate->state == _Py_THREAD_ATTACHED);
+    assert(tstate == current_fast_get(&_PyRuntime));
+    tstate_set_detached(tstate);
+    tstate_deactivate(tstate);
+    current_fast_clear(&_PyRuntime);
+    _PyEval_ReleaseLock(tstate->interp, tstate);
+}
 
 //----------
 // other API
@@ -1835,7 +1999,7 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc)
 //---------------------------------
 
 PyThreadState *
-_PyThreadState_UncheckedGet(void)
+PyThreadState_GetUnchecked(void)
 {
     return current_fast_get(&_PyRuntime);
 }
@@ -1849,56 +2013,15 @@ PyThreadState_Get(void)
     return tstate;
 }
 
-
-static void
-_swap_thread_states(_PyRuntimeState *runtime,
-                    PyThreadState *oldts, PyThreadState *newts)
-{
-    // XXX Do this only if oldts != NULL?
-    current_fast_clear(runtime);
-
-    if (oldts != NULL) {
-        // XXX assert(tstate_is_alive(oldts) && tstate_is_bound(oldts));
-        tstate_deactivate(oldts);
-    }
-
-    if (newts != NULL) {
-        // XXX assert(tstate_is_alive(newts));
-        assert(tstate_is_bound(newts));
-        current_fast_set(runtime, newts);
-        tstate_activate(newts);
-    }
-}
-
-PyThreadState *
-_PyThreadState_SwapNoGIL(PyThreadState *newts)
-{
-#if defined(Py_DEBUG)
-    /* This can be called from PyEval_RestoreThread(). Similar
-       to it, we need to ensure errno doesn't change.
-    */
-    int err = errno;
-#endif
-
-    PyThreadState *oldts = current_fast_get(&_PyRuntime);
-    _swap_thread_states(&_PyRuntime, oldts, newts);
-
-#if defined(Py_DEBUG)
-    errno = err;
-#endif
-    return oldts;
-}
-
 PyThreadState *
 _PyThreadState_Swap(_PyRuntimeState *runtime, PyThreadState *newts)
 {
     PyThreadState *oldts = current_fast_get(runtime);
     if (oldts != NULL) {
-        _PyEval_ReleaseLock(oldts->interp, oldts);
+        _PyThreadState_Detach(oldts);
     }
-    _swap_thread_states(runtime, oldts, newts);
     if (newts != NULL) {
-        _PyEval_AcquireLock(newts);
+        _PyThreadState_Attach(newts);
     }
     return oldts;
 }
@@ -2207,7 +2330,9 @@ PyGILState_Ensure(void)
     int has_gil;
     if (tcur == NULL) {
         /* Create a new Python thread state for this thread */
-        tcur = new_threadstate(runtime->gilstate.autoInterpreterState);
+        // XXX Use PyInterpreterState_EnsureThreadState()?
+        tcur = new_threadstate(runtime->gilstate.autoInterpreterState,
+                               _PyThreadState_WHENCE_GILSTATE);
         if (tcur == NULL) {
             Py_FatalError("Couldn't create thread-state for new thread");
         }
@@ -2538,23 +2663,27 @@ _PyCrossInterpreterData_ReleaseAndRawFree(_PyCrossInterpreterData *data)
    crossinterpdatafunc. It would be simpler and more efficient. */
 
 static int
-_xidregistry_add_type(struct _xidregistry *xidregistry, PyTypeObject *cls,
-                 crossinterpdatafunc getdata)
+_xidregistry_add_type(struct _xidregistry *xidregistry,
+                      PyTypeObject *cls, crossinterpdatafunc getdata)
 {
-    // Note that we effectively replace already registered classes
-    // rather than failing.
     struct _xidregitem *newhead = PyMem_RawMalloc(sizeof(struct _xidregitem));
     if (newhead == NULL) {
         return -1;
     }
-    // XXX Assign a callback to clear the entry from the registry?
-    newhead->cls = PyWeakref_NewRef((PyObject *)cls, NULL);
-    if (newhead->cls == NULL) {
-        PyMem_RawFree(newhead);
-        return -1;
+    *newhead = (struct _xidregitem){
+        // We do not keep a reference, to avoid keeping the class alive.
+        .cls = cls,
+        .refcount = 1,
+        .getdata = getdata,
+    };
+    if (cls->tp_flags & Py_TPFLAGS_HEAPTYPE) {
+        // XXX Assign a callback to clear the entry from the registry?
+        newhead->weakref = PyWeakref_NewRef((PyObject *)cls, NULL);
+        if (newhead->weakref == NULL) {
+            PyMem_RawFree(newhead);
+            return -1;
+        }
     }
-    newhead->getdata = getdata;
-    newhead->prev = NULL;
     newhead->next = xidregistry->head;
     if (newhead->next != NULL) {
         newhead->next->prev = newhead;
@@ -2579,39 +2708,77 @@ _xidregistry_remove_entry(struct _xidregistry *xidregistry,
     if (next != NULL) {
         next->prev = entry->prev;
     }
-    Py_DECREF(entry->cls);
+    Py_XDECREF(entry->weakref);
     PyMem_RawFree(entry);
     return next;
 }
 
+static void
+_xidregistry_clear(struct _xidregistry *xidregistry)
+{
+    struct _xidregitem *cur = xidregistry->head;
+    xidregistry->head = NULL;
+    while (cur != NULL) {
+        struct _xidregitem *next = cur->next;
+        Py_XDECREF(cur->weakref);
+        PyMem_RawFree(cur);
+        cur = next;
+    }
+}
+
 static struct _xidregitem *
 _xidregistry_find_type(struct _xidregistry *xidregistry, PyTypeObject *cls)
 {
     struct _xidregitem *cur = xidregistry->head;
     while (cur != NULL) {
-        PyObject *registered = _PyWeakref_GET_REF(cur->cls);
-        if (registered == NULL) {
-            // The weakly ref'ed object was freed.
-            cur = _xidregistry_remove_entry(xidregistry, cur);
-        }
-        else {
-            assert(PyType_Check(registered));
-            if (registered == (PyObject *)cls) {
-                Py_DECREF(registered);
-                return cur;
+        if (cur->weakref != NULL) {
+            // cur is/was a heap type.
+            PyObject *registered = _PyWeakref_GET_REF(cur->weakref);
+            if (registered == NULL) {
+                // The weakly ref'ed object was freed.
+                cur = _xidregistry_remove_entry(xidregistry, cur);
+                continue;
             }
+            assert(PyType_Check(registered));
+            assert(cur->cls == (PyTypeObject *)registered);
+            assert(cur->cls->tp_flags & Py_TPFLAGS_HEAPTYPE);
             Py_DECREF(registered);
-            cur = cur->next;
         }
+        if (cur->cls == cls) {
+            return cur;
+        }
+        cur = cur->next;
     }
     return NULL;
 }
 
+static inline struct _xidregistry *
+_get_xidregistry(PyInterpreterState *interp, PyTypeObject *cls)
+{
+    struct _xidregistry *xidregistry = &interp->runtime->xidregistry;
+    if (cls->tp_flags & Py_TPFLAGS_HEAPTYPE) {
+        assert(interp->xidregistry.mutex == xidregistry->mutex);
+        xidregistry = &interp->xidregistry;
+    }
+    return xidregistry;
+}
+
 static void _register_builtins_for_crossinterpreter_data(struct _xidregistry *xidregistry);
 
+static inline void
+_ensure_builtins_xid(PyInterpreterState *interp, struct _xidregistry *xidregistry)
+{
+    if (xidregistry != &interp->xidregistry) {
+        assert(xidregistry == &interp->runtime->xidregistry);
+        if (xidregistry->head == NULL) {
+            _register_builtins_for_crossinterpreter_data(xidregistry);
+        }
+    }
+}
+
 int
 _PyCrossInterpreterData_RegisterClass(PyTypeObject *cls,
-                                       crossinterpdatafunc getdata)
+                                      crossinterpdatafunc getdata)
 {
     if (!PyType_Check(cls)) {
         PyErr_Format(PyExc_ValueError, "only classes may be registered");
@@ -2622,12 +2789,23 @@ _PyCrossInterpreterData_RegisterClass(PyTypeObject *cls,
         return -1;
     }
 
-    struct _xidregistry *xidregistry = &_PyRuntime.xidregistry ;
+    int res = 0;
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    struct _xidregistry *xidregistry = _get_xidregistry(interp, cls);
     PyThread_acquire_lock(xidregistry->mutex, WAIT_LOCK);
-    if (xidregistry->head == NULL) {
-        _register_builtins_for_crossinterpreter_data(xidregistry);
+
+    _ensure_builtins_xid(interp, xidregistry);
+
+    struct _xidregitem *matched = _xidregistry_find_type(xidregistry, cls);
+    if (matched != NULL) {
+        assert(matched->getdata == getdata);
+        matched->refcount += 1;
+        goto finally;
     }
-    int res = _xidregistry_add_type(xidregistry, cls, getdata);
+
+    res = _xidregistry_add_type(xidregistry, cls, getdata);
+
+finally:
     PyThread_release_lock(xidregistry->mutex);
     return res;
 }
@@ -2636,13 +2814,20 @@ int
 _PyCrossInterpreterData_UnregisterClass(PyTypeObject *cls)
 {
     int res = 0;
-    struct _xidregistry *xidregistry = &_PyRuntime.xidregistry ;
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    struct _xidregistry *xidregistry = _get_xidregistry(interp, cls);
     PyThread_acquire_lock(xidregistry->mutex, WAIT_LOCK);
+
     struct _xidregitem *matched = _xidregistry_find_type(xidregistry, cls);
     if (matched != NULL) {
-        (void)_xidregistry_remove_entry(xidregistry, matched);
+        assert(matched->refcount > 0);
+        matched->refcount -= 1;
+        if (matched->refcount == 0) {
+            (void)_xidregistry_remove_entry(xidregistry, matched);
+        }
         res = 1;
     }
+
     PyThread_release_lock(xidregistry->mutex);
     return res;
 }
@@ -2655,17 +2840,19 @@ _PyCrossInterpreterData_UnregisterClass(PyTypeObject *cls)
 crossinterpdatafunc
 _PyCrossInterpreterData_Lookup(PyObject *obj)
 {
-    struct _xidregistry *xidregistry = &_PyRuntime.xidregistry ;
-    PyObject *cls = PyObject_Type(obj);
+    PyTypeObject *cls = Py_TYPE(obj);
+
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    struct _xidregistry *xidregistry = _get_xidregistry(interp, cls);
     PyThread_acquire_lock(xidregistry->mutex, WAIT_LOCK);
-    if (xidregistry->head == NULL) {
-        _register_builtins_for_crossinterpreter_data(xidregistry);
-    }
-    struct _xidregitem *matched = _xidregistry_find_type(xidregistry,
-                                                         (PyTypeObject *)cls);
-    Py_DECREF(cls);
+
+    _ensure_builtins_xid(interp, xidregistry);
+
+    struct _xidregitem *matched = _xidregistry_find_type(xidregistry, cls);
+    crossinterpdatafunc func = matched != NULL ? matched->getdata : NULL;
+
     PyThread_release_lock(xidregistry->mutex);
-    return matched != NULL ? matched->getdata : NULL;
+    return func;
 }
 
 /* cross-interpreter data for builtin types */
@@ -2801,6 +2988,10 @@ _register_builtins_for_crossinterpreter_data(struct _xidregistry *xidregistry)
 }
 
 
+/*************/
+/* Other API */
+/*************/
+
 _PyFrameEvalFunction
 _PyInterpreterState_GetEvalFrameFunc(PyInterpreterState *interp)
 {
diff --git a/Python/specialize.c b/Python/specialize.c
index d9b748cad78f4f..49633b103b3815 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -199,10 +199,6 @@ print_object_stats(FILE *out, ObjectStats *stats)
     fprintf(out, "Object method cache collisions: %" PRIu64 "\n", stats->type_cache_collisions);
     fprintf(out, "Object method cache dunder hits: %" PRIu64 "\n", stats->type_cache_dunder_hits);
     fprintf(out, "Object method cache dunder misses: %" PRIu64 "\n", stats->type_cache_dunder_misses);
-    fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->optimization_attempts);
-    fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->optimization_traces_created);
-    fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->optimization_traces_executed);
-    fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->optimization_uops_executed);
 }
 
 static void
@@ -215,6 +211,56 @@ print_gc_stats(FILE *out, GCStats *stats)
     }
 }
 
+static void
+print_histogram(FILE *out, const char *name, uint64_t hist[_Py_UOP_HIST_SIZE])
+{
+    for (int i = 0; i < _Py_UOP_HIST_SIZE; i++) {
+        fprintf(out, "%s[%" PRIu64"]: %" PRIu64 "\n", name, (uint64_t)1 << i, hist[i]);
+    }
+}
+
+static void
+print_optimization_stats(FILE *out, OptimizationStats *stats)
+{
+    fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->attempts);
+    fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->traces_created);
+    fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->traces_executed);
+    fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->uops_executed);
+    fprintf(out, "Optimization trace stack overflow: %" PRIu64 "\n", stats->trace_stack_overflow);
+    fprintf(out, "Optimization trace stack underflow: %" PRIu64 "\n", stats->trace_stack_underflow);
+    fprintf(out, "Optimization trace too long: %" PRIu64 "\n", stats->trace_too_long);
+    fprintf(out, "Optimization trace too short: %" PRIu64 "\n", stats->trace_too_short);
+    fprintf(out, "Optimization inner loop: %" PRIu64 "\n", stats->inner_loop);
+    fprintf(out, "Optimization recursive call: %" PRIu64 "\n", stats->recursive_call);
+
+    print_histogram(out, "Trace length", stats->trace_length_hist);
+    print_histogram(out, "Trace run length", stats->trace_run_length_hist);
+    print_histogram(out, "Optimized trace length", stats->optimized_trace_length_hist);
+
+    const char* const* names;
+    for (int i = 0; i < 512; i++) {
+        if (i < 256) {
+            names = _PyOpcode_OpName;
+        } else {
+            names = _PyOpcode_uop_name;
+        }
+        if (stats->opcode[i].execution_count) {
+            fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", names[i], stats->opcode[i].execution_count);
+        }
+    }
+
+    for (int i = 0; i < 256; i++) {
+        if (stats->unsupported_opcode[i]) {
+            fprintf(
+                out,
+                "unsupported_opcode[%s].count : %" PRIu64 "\n",
+                _PyOpcode_OpName[i],
+                stats->unsupported_opcode[i]
+            );
+        }
+    }
+}
+
 static void
 print_stats(FILE *out, PyStats *stats)
 {
@@ -222,6 +268,7 @@ print_stats(FILE *out, PyStats *stats)
     print_call_stats(out, &stats->call_stats);
     print_object_stats(out, &stats->object_stats);
     print_gc_stats(out, stats->gc_stats);
+    print_optimization_stats(out, &stats->optimization_stats);
 }
 
 void
diff --git a/Python/stdlib_module_names.h b/Python/stdlib_module_names.h
index 13b1764f0886d1..701bfc35cc8182 100644
--- a/Python/stdlib_module_names.h
+++ b/Python/stdlib_module_names.h
@@ -77,6 +77,7 @@ static const char* _Py_stdlib_module_names[] = {
 "_strptime",
 "_struct",
 "_symtable",
+"_sysconfig",
 "_thread",
 "_threading_local",
 "_tkinter",
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 7ba7be10aacb92..b00301765e1890 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -40,7 +40,9 @@ Data members:
 #include "osdefs.h"               // DELIM
 #include "stdlib_module_names.h"  // _Py_stdlib_module_names
 
-#include <locale.h>
+#ifdef HAVE_UNISTD_H
+#  include <unistd.h>             // getpid()
+#endif
 
 #ifdef MS_WINDOWS
 #  define WIN32_LEAN_AND_MEAN
diff --git a/Python/thread.c b/Python/thread.c
index 1ac2db2937e373..bf207cecb90505 100644
--- a/Python/thread.c
+++ b/Python/thread.c
@@ -8,7 +8,7 @@
 #include "Python.h"
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
 #include "pycore_structseq.h"     // _PyStructSequence_FiniBuiltin()
-#include "pycore_pythread.h"
+#include "pycore_pythread.h"      // _POSIX_THREADS
 
 #ifndef DONT_HAVE_STDIO_H
 #  include <stdio.h>
@@ -17,6 +17,26 @@
 #include <stdlib.h>
 
 
+// Define PY_TIMEOUT_MAX constant.
+#ifdef _POSIX_THREADS
+   // PyThread_acquire_lock_timed() uses _PyTime_FromNanoseconds(us * 1000),
+   // convert microseconds to nanoseconds.
+#  define PY_TIMEOUT_MAX_VALUE (LLONG_MAX / 1000)
+#elif defined (NT_THREADS)
+   // WaitForSingleObject() accepts timeout in milliseconds in the range
+   // [0; 0xFFFFFFFE] (DWORD type). INFINITE value (0xFFFFFFFF) means no
+   // timeout. 0xFFFFFFFE milliseconds is around 49.7 days.
+#  if 0xFFFFFFFELL < LLONG_MAX / 1000
+#    define PY_TIMEOUT_MAX_VALUE (0xFFFFFFFELL * 1000)
+#  else
+#    define PY_TIMEOUT_MAX_VALUE LLONG_MAX
+#  endif
+#else
+#  define PY_TIMEOUT_MAX_VALUE LLONG_MAX
+#endif
+const long long PY_TIMEOUT_MAX = PY_TIMEOUT_MAX_VALUE;
+
+
 static void PyThread__init_thread(void); /* Forward */
 
 #define initialized _PyRuntime.threads.initialized
diff --git a/Python/thread_pthread.h b/Python/thread_pthread.h
index f96c57da64636d..76a1f7763f23b9 100644
--- a/Python/thread_pthread.h
+++ b/Python/thread_pthread.h
@@ -1,4 +1,5 @@
-#include "pycore_interp.h"    // _PyInterpreterState.threads.stacksize
+#include "pycore_interp.h"        // _PyInterpreterState.threads.stacksize
+#include "pycore_pythread.h"      // _POSIX_SEMAPHORES
 
 /* Posix threads interface */
 
@@ -84,10 +85,10 @@
 /* On FreeBSD 4.x, _POSIX_SEMAPHORES is defined empty, so
    we need to add 0 to make it work there as well. */
 #if (_POSIX_SEMAPHORES+0) == -1
-#define HAVE_BROKEN_POSIX_SEMAPHORES
+#  define HAVE_BROKEN_POSIX_SEMAPHORES
 #else
-#include <semaphore.h>
-#include <errno.h>
+#  include <semaphore.h>
+#  include <errno.h>
 #endif
 #endif
 
diff --git a/Python/traceback.c b/Python/traceback.c
index 7e791d0a59bd82..5de1bff9943c6c 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -20,13 +20,14 @@
 #include "frameobject.h"          // PyFrame_New()
 
 #include "osdefs.h"               // SEP
-#ifdef HAVE_FCNTL_H
-#  include <fcntl.h>
+#ifdef HAVE_UNISTD_H
+#  include <unistd.h>             // lseek()
 #endif
 
-#define OFF(x) offsetof(PyTracebackObject, x)
 
+#define OFF(x) offsetof(PyTracebackObject, x)
 #define PUTS(fd, str) (void)_Py_write_noraise(fd, str, (int)strlen(str))
+
 #define MAX_STRING_LENGTH 500
 #define MAX_FRAME_DEPTH 100
 #define MAX_NTHREADS 100
diff --git a/Tools/build/smelly.py b/Tools/build/smelly.py
index 276a5ab2cc84c6..ab345307ff9b64 100755
--- a/Tools/build/smelly.py
+++ b/Tools/build/smelly.py
@@ -11,6 +11,11 @@
 if sys.platform == 'darwin':
     ALLOWED_PREFIXES += ('__Py',)
 
+# "Legacy": some old symbols are prefixed by "PY_".
+EXCEPTIONS = frozenset({
+    'PY_TIMEOUT_MAX',
+})
+
 IGNORED_EXTENSION = "_ctypes_test"
 # Ignore constructor and destructor functions
 IGNORED_SYMBOLS = {'_init', '_fini'}
@@ -72,7 +77,7 @@ def get_smelly_symbols(stdout):
         symbol = parts[-1]
         result = '%s (type: %s)' % (symbol, symtype)
 
-        if symbol.startswith(ALLOWED_PREFIXES):
+        if symbol.startswith(ALLOWED_PREFIXES) or symbol in EXCEPTIONS:
             python_symbols.append(result)
             continue
 
diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv
index 1f398701a7a5b5..f9911643332b5e 100644
--- a/Tools/c-analyzer/cpython/ignored.tsv
+++ b/Tools/c-analyzer/cpython/ignored.tsv
@@ -88,6 +88,10 @@ Parser/myreadline.c	-	PyOS_ReadlineFunctionPointer	-
 Python/initconfig.c	-	_Py_StandardStreamEncoding	-
 Python/initconfig.c	-	_Py_StandardStreamErrors	-
 
+# Internal constant list
+Python/initconfig.c	-	PYCONFIG_SPEC	-
+
+
 ##-----------------------
 ## public C-API
 
@@ -603,6 +607,7 @@ Modules/_xxtestfuzz/fuzzer.c	LLVMFuzzerTestOneInput	AST_LITERAL_EVAL_INITIALIZED
 # XXX Fix the analyzer.
 
 ## forward/extern references
+Include/internal/pycore_importdl.h	-	_PyImport_DynLoadFiletab	-
 Include/py_curses.h	-	PyCurses_API	-
 Include/pydecimal.h	-	_decimal_api	-
 Modules/_blake2/blake2module.c	-	blake2b_type_spec	-
@@ -664,7 +669,6 @@ Objects/object.c	-	_PyLineIterator	-
 Objects/object.c	-	_PyPositionsIterator	-
 Python/perf_trampoline.c	-	_Py_trampoline_func_start	-
 Python/perf_trampoline.c	-	_Py_trampoline_func_end	-
-Python/importdl.h	-	_PyImport_DynLoadFiletab	-
 Modules/expat/xmlrole.c	-	prolog0	-
 Modules/expat/xmlrole.c	-	prolog1	-
 Modules/expat/xmlrole.c	-	prolog2	-
diff --git a/Tools/cases_generator/analysis.py b/Tools/cases_generator/analysis.py
index 91dcba8ceee13d..7bbc924e5083f1 100644
--- a/Tools/cases_generator/analysis.py
+++ b/Tools/cases_generator/analysis.py
@@ -26,7 +26,7 @@
     "co_names": "Use FRAME_CO_NAMES.",
 }
 
-RE_PREDICTED = r"^\s*(?:GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);\s*(?://.*)?$"
+RE_GO_TO_INSTR = r"^\s*GO_TO_INSTRUCTION\((\w+)\);\s*(?://.*)?$"
 
 
 class Analyzer:
@@ -187,16 +187,23 @@ def analyze(self) -> None:
         Raises SystemExit if there is an error.
         """
         self.analyze_macros_and_pseudos()
-        self.find_predictions()
         self.map_families()
+        self.mark_predictions()
         self.check_families()
 
-    def find_predictions(self) -> None:
-        """Find the instructions that need PREDICTED() labels."""
+    def mark_predictions(self) -> None:
+        """Mark the instructions that need PREDICTED() labels."""
+        # Start with family heads
+        for family in self.families.values():
+            if family.name in self.instrs:
+                self.instrs[family.name].predicted = True
+            if family.name in self.macro_instrs:
+                self.macro_instrs[family.name].predicted = True
+        # Also look for GO_TO_INSTRUCTION() calls
         for instr in self.instrs.values():
             targets: set[str] = set()
             for line in instr.block_text:
-                if m := re.match(RE_PREDICTED, line):
+                if m := re.match(RE_GO_TO_INSTR, line):
                     targets.add(m.group(1))
             for target in targets:
                 if target_instr := self.instrs.get(target):
@@ -225,11 +232,18 @@ def map_families(self) -> None:
                         )
                     else:
                         member_instr.family = family
-                elif not self.macro_instrs.get(member):
+                if member_mac := self.macro_instrs.get(member):
+                    assert member_mac.family is None, (member, member_mac.family.name)
+                    member_mac.family = family
+                if not member_instr and not member_mac:
                     self.error(
                         f"Unknown instruction {member!r} referenced in family {family.name!r}",
                         family,
                     )
+        # A sanctioned exception:
+        # This opcode is a member of the family but it doesn't pass the checks.
+        if mac := self.macro_instrs.get("BINARY_OP_INPLACE_ADD_UNICODE"):
+            mac.family = self.families.get("BINARY_OP")
 
     def check_families(self) -> None:
         """Check each family:
diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py
index 9192d1038ab7d6..01ab83bedb2985 100644
--- a/Tools/cases_generator/generate_cases.py
+++ b/Tools/cases_generator/generate_cases.py
@@ -781,9 +781,7 @@ def write_instructions(
                     case parsing.Macro():
                         n_macros += 1
                         mac = self.macro_instrs[thing.name]
-                        stacking.write_macro_instr(
-                            mac, self.out, self.families.get(mac.name)
-                        )
+                        stacking.write_macro_instr(mac, self.out)
                     case parsing.Pseudo():
                         pass
                     case _:
diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py
index 6fbf7d93f42fde..bd7b7dfbaa8f70 100644
--- a/Tools/cases_generator/instructions.py
+++ b/Tools/cases_generator/instructions.py
@@ -144,7 +144,8 @@ def write_body(
         out: Formatter,
         dedent: int,
         active_caches: list[ActiveCacheEffect],
-        tier: Tiers = TIER_ONE,
+        tier: Tiers,
+        family: parsing.Family | None,
     ) -> None:
         """Write the instruction body."""
         # Write cache effect variable declarations and initializations
@@ -207,6 +208,16 @@ def write_body(
                     )
                 else:
                     out.write_raw(f"{space}if ({cond}) goto {label};\n")
+            elif m := re.match(r"(\s*)DEOPT_IF\((.+)\);\s*(?://.*)?$", line):
+                space, cond = m.groups()
+                space = extra + space
+                target = family.name if family else self.name
+                out.write_raw(f"{space}DEOPT_IF({cond}, {target});\n")
+            elif "DEOPT" in line:
+                filename = context.owner.filename
+                lineno = context.owner.tokens[context.begin].line
+                print(f"{filename}:{lineno}: ERROR: DEOPT_IF() must be all on one line")
+                out.write_raw(extra + line)
             elif m := re.match(r"(\s*)DECREF_INPUTS\(\);\s*(?://.*)?$", line):
                 out.reset_lineno()
                 space = extra + m.group(1)
@@ -244,7 +255,8 @@ def write_body(
         out: Formatter,
         dedent: int,
         active_caches: list[ActiveCacheEffect],
-        tier: Tiers = TIER_ONE,
+        tier: Tiers,
+        family: parsing.Family | None,
     ) -> None:
         pass
 
@@ -268,7 +280,9 @@ class MacroInstruction:
     macro: parsing.Macro
     parts: MacroParts
     cache_offset: int
+    # Set later
     predicted: bool = False
+    family: parsing.Family | None = None
 
 
 @dataclasses.dataclass
diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py
index 1f9fda66a5f034..bba2db8b059da8 100644
--- a/Tools/cases_generator/stacking.py
+++ b/Tools/cases_generator/stacking.py
@@ -351,14 +351,13 @@ def write_single_instr(
             out,
             tier,
             0,
+            instr.family,
         )
     except AssertionError as err:
         raise AssertionError(f"Error writing instruction {instr.name}") from err
 
 
-def write_macro_instr(
-    mac: MacroInstruction, out: Formatter, family: Family | None
-) -> None:
+def write_macro_instr(mac: MacroInstruction, out: Formatter) -> None:
     parts = [
         part
         for part in mac.parts
@@ -368,9 +367,11 @@ def write_macro_instr(
     with out.block(f"TARGET({mac.name})"):
         if mac.predicted:
             out.emit(f"PREDICTED({mac.name});")
-        out.static_assert_family_size(mac.name, family, mac.cache_offset)
+        out.static_assert_family_size(mac.name, mac.family, mac.cache_offset)
         try:
-            next_instr_is_set = write_components(parts, out, TIER_ONE, mac.cache_offset)
+            next_instr_is_set = write_components(
+                parts, out, TIER_ONE, mac.cache_offset, mac.family
+            )
         except AssertionError as err:
             raise AssertionError(f"Error writing macro {mac.name}") from err
         if not parts[-1].instr.always_exits:
@@ -386,6 +387,7 @@ def write_components(
     out: Formatter,
     tier: Tiers,
     cache_offset: int,
+    family: Family | None,
 ) -> bool:
     managers = get_managers(parts)
 
@@ -454,10 +456,10 @@ def write_components(
                 assert_no_pokes(managers)
 
         if len(parts) == 1:
-            mgr.instr.write_body(out, 0, mgr.active_caches, tier)
+            mgr.instr.write_body(out, 0, mgr.active_caches, tier, family)
         else:
             with out.block(""):
-                mgr.instr.write_body(out, -4, mgr.active_caches, tier)
+                mgr.instr.write_body(out, -4, mgr.active_caches, tier, family)
 
         if mgr is managers[-1] and not next_instr_is_set and not mgr.instr.always_exits:
             # Adjust the stack to its final depth, *then* write the
diff --git a/Tools/freeze/test/freeze.py b/Tools/freeze/test/freeze.py
index cdf77c57bbb6ae..9030ad4d4e5f93 100644
--- a/Tools/freeze/test/freeze.py
+++ b/Tools/freeze/test/freeze.py
@@ -130,7 +130,7 @@ def prepare(script=None, outdir=None):
     if not MAKE:
         raise UnsupportedError('make')
 
-    cores = os.cpu_count()
+    cores = os.process_cpu_count()
     if cores and cores >= 3:
         # this test is most often run as part of the whole suite with a lot
         # of other tests running in parallel, from 1-2 vCPU systems up to
diff --git a/Tools/msi/test/test_files.wxs b/Tools/msi/test/test_files.wxs
index 87e164cb6759f6..bb9b258692a62f 100644
--- a/Tools/msi/test/test_files.wxs
+++ b/Tools/msi/test/test_files.wxs
@@ -1,41 +1,41 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <Wix xmlns="http://schemas.microsoft.com/wix/2006/wi">
-    <?define exts=_testcapi;_ctypes_test;_testbuffer;_testimportmultiple;_testmultiphase;_testsinglephase;_testconsole;_testinternalcapi;_testclinic ?>
+    <?define exts=_testcapi;_ctypes_test;_testbuffer;_testimportmultiple;_testmultiphase;_testsinglephase;_testconsole;_testinternalcapi;_testclinic;_testclinic_limited ?>
     <Fragment>
         <ComponentGroup Id="test_extensions">
             <?foreach ext in $(var.exts)?>
-        
+
             <Component Id="$(var.ext).pyd" Directory="DLLs" Guid="*">
                 <File Name="$(var.ext).pyd" KeyPath="yes" />
             </Component>
-            
+
             <?endforeach ?>
         </ComponentGroup>
     </Fragment>
-    
+
     <Fragment>
         <ComponentGroup Id="test_extensions_symbols">
             <?foreach ext in $(var.exts)?>
-            
+
             <Component Id="$(var.ext).pdb" Directory="DLLs" Guid="*">
                 <File Name="$(var.ext).pdb" />
             </Component>
-            
+
             <?endforeach ?>
         </ComponentGroup>
     </Fragment>
-    
+
     <Fragment>
         <ComponentGroup Id="test_extensions_d">
             <?foreach ext in $(var.exts)?>
-            
+
             <Component Id="$(var.ext)_d.pyd" Directory="DLLs" Guid="*">
                 <File Name="$(var.ext)_d.pyd" />
             </Component>
             <Component Id="$(var.ext)_d.pdb" Directory="DLLs" Guid="*">
                 <File Name="$(var.ext)_d.pdb" />
             </Component>
-            
+
             <?endforeach ?>
         </ComponentGroup>
     </Fragment>
diff --git a/Tools/requirements-dev.txt b/Tools/requirements-dev.txt
index 35bceb205e8a9b..add28b1bb38183 100644
--- a/Tools/requirements-dev.txt
+++ b/Tools/requirements-dev.txt
@@ -4,4 +4,4 @@ mypy==1.5.1
 
 # needed for peg_generator:
 types-psutil==5.9.5.16
-types-setuptools==68.1.0.1
+types-setuptools==68.2.0.0
diff --git a/Tools/requirements-hypothesis.txt b/Tools/requirements-hypothesis.txt
index 9db2b74c87cfb0..b95300a07dd2b4 100644
--- a/Tools/requirements-hypothesis.txt
+++ b/Tools/requirements-hypothesis.txt
@@ -1,4 +1,4 @@
 # Requirements file for hypothesis that
 # we use to run our property-based tests in CI.
 
-hypothesis==6.84.0
+hypothesis==6.87.1
diff --git a/Tools/scripts/run_tests.py b/Tools/scripts/run_tests.py
deleted file mode 100644
index 3e3d15d3b0da5c..00000000000000
--- a/Tools/scripts/run_tests.py
+++ /dev/null
@@ -1,78 +0,0 @@
-"""Run Python's test suite in a fast, rigorous way.
-
-The defaults are meant to be reasonably thorough, while skipping certain
-tests that can be time-consuming or resource-intensive (e.g. largefile),
-or distracting (e.g. audio and gui). These defaults can be overridden by
-simply passing a -u option to this script.
-
-"""
-
-import os
-import shlex
-import sys
-import sysconfig
-import test.support
-
-
-def is_multiprocess_flag(arg):
-    return arg.startswith('-j') or arg.startswith('--multiprocess')
-
-
-def is_python_flag(arg):
-    return arg.startswith('-p') or arg.startswith('--python')
-
-
-def main(regrtest_args):
-    args = [sys.executable]
-
-    cross_compile = '_PYTHON_HOST_PLATFORM' in os.environ
-    if (hostrunner := os.environ.get("_PYTHON_HOSTRUNNER")) is None:
-        hostrunner = sysconfig.get_config_var("HOSTRUNNER")
-    if cross_compile:
-        # emulate -E, but keep PYTHONPATH + cross compile env vars, so
-        # test executable can load correct sysconfigdata file.
-        keep = {
-            '_PYTHON_PROJECT_BASE',
-            '_PYTHON_HOST_PLATFORM',
-            '_PYTHON_SYSCONFIGDATA_NAME',
-            'PYTHONPATH'
-        }
-        environ = {
-            name: value for name, value in os.environ.items()
-            if not name.startswith(('PYTHON', '_PYTHON')) or name in keep
-        }
-    else:
-        environ = os.environ.copy()
-
-    # Allow user-specified interpreter options to override our defaults.
-    args.extend(test.support.args_from_interpreter_flags())
-
-    args.extend(['-m', 'test',    # Run the test suite
-                 '--fast-ci',     # Fast Continuous Integration mode
-                 ])
-    if not any(is_multiprocess_flag(arg) for arg in regrtest_args):
-        if cross_compile and hostrunner:
-            # For now use only two cores for cross-compiled builds;
-            # hostrunner can be expensive.
-            args.extend(['-j', '2'])
-
-    if cross_compile and hostrunner:
-        # If HOSTRUNNER is set and -p/--python option is not given, then
-        # use hostrunner to execute python binary for tests.
-        if not any(is_python_flag(arg) for arg in regrtest_args):
-            buildpython = sysconfig.get_config_var("BUILDPYTHON")
-            args.extend(["--python", f"{hostrunner} {buildpython}"])
-
-    args.extend(regrtest_args)
-
-    print(shlex.join(args), flush=True)
-
-    if sys.platform == 'win32':
-        from subprocess import call
-        sys.exit(call(args))
-    else:
-        os.execve(sys.executable, args, environ)
-
-
-if __name__ == '__main__':
-    main(sys.argv[1:])
diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py
index 3b2bdd8015be4a..bdca51df3dac53 100644
--- a/Tools/scripts/summarize_stats.py
+++ b/Tools/scripts/summarize_stats.py
@@ -13,6 +13,7 @@
 from datetime import date
 import itertools
 import sys
+import re
 
 if os.name == "nt":
     DEFAULT_DIR = "c:\\temp\\py_stats\\"
@@ -21,6 +22,7 @@
 
 TOTAL = "specialization.hit", "specialization.miss", "execution_count"
 
+
 def format_ratio(num, den):
     """
     Format a ratio as a percentage. When the denominator is 0, returns the empty
@@ -31,6 +33,7 @@ def format_ratio(num, den):
     else:
         return f"{num/den:.01%}"
 
+
 def percentage_to_float(s):
     """
     Converts a percentage string to a float.  The empty string is returned as 0.0
@@ -41,6 +44,7 @@ def percentage_to_float(s):
         assert s[-1] == "%"
         return float(s[:-1])
 
+
 def join_rows(a_rows, b_rows):
     """
     Joins two tables together, side-by-side, where the first column in each is a
@@ -79,40 +83,53 @@ def join_rows(a_rows, b_rows):
     keys = list(a_data.keys()) + [k for k in b_data.keys() if k not in a_data]
     return [(k, *a_data.get(k, default), *b_data.get(k, default)) for k in keys]
 
+
 def calculate_specialization_stats(family_stats, total):
     rows = []
     for key in sorted(family_stats):
         if key.startswith("specialization.failure_kinds"):
             continue
         if key in ("specialization.hit", "specialization.miss"):
-            label = key[len("specialization."):]
+            label = key[len("specialization.") :]
         elif key == "execution_count":
             continue
-        elif key in ("specialization.success",  "specialization.failure", "specializable"):
+        elif key in (
+            "specialization.success",
+            "specialization.failure",
+            "specializable",
+        ):
             continue
         elif key.startswith("pair"):
             continue
         else:
             label = key
-        rows.append((f"{label:>12}", f"{family_stats[key]:>12}", format_ratio(family_stats[key], total)))
+        rows.append(
+            (
+                f"{label:>12}",
+                f"{family_stats[key]:>12}",
+                format_ratio(family_stats[key], total),
+            )
+        )
     return rows
 
+
 def calculate_specialization_success_failure(family_stats):
     total_attempts = 0
-    for key in ("specialization.success",  "specialization.failure"):
+    for key in ("specialization.success", "specialization.failure"):
         total_attempts += family_stats.get(key, 0)
     rows = []
     if total_attempts:
-        for key in ("specialization.success",  "specialization.failure"):
-            label = key[len("specialization."):]
+        for key in ("specialization.success", "specialization.failure"):
+            label = key[len("specialization.") :]
             label = label[0].upper() + label[1:]
             val = family_stats.get(key, 0)
             rows.append((label, val, format_ratio(val, total_attempts)))
     return rows
 
+
 def calculate_specialization_failure_kinds(name, family_stats, defines):
     total_failures = family_stats.get("specialization.failure", 0)
-    failure_kinds = [ 0 ] * 40
+    failure_kinds = [0] * 40
     for key in family_stats:
         if not key.startswith("specialization.failure_kind"):
             continue
@@ -125,9 +142,16 @@ def calculate_specialization_failure_kinds(name, family_stats, defines):
     for value, index in failures:
         if not value:
             continue
-        rows.append((kind_to_text(index, defines, name), value, format_ratio(value, total_failures)))
+        rows.append(
+            (
+                kind_to_text(index, defines, name),
+                value,
+                format_ratio(value, total_failures),
+            )
+        )
     return rows
 
+
 def print_specialization_stats(name, family_stats, defines):
     if "specializable" not in family_stats:
         return
@@ -144,7 +168,10 @@ def print_specialization_stats(name, family_stats, defines):
             rows = calculate_specialization_failure_kinds(name, family_stats, defines)
             emit_table(("Failure kind", "Count:", "Ratio:"), rows)
 
-def print_comparative_specialization_stats(name, base_family_stats, head_family_stats, defines):
+
+def print_comparative_specialization_stats(
+    name, base_family_stats, head_family_stats, defines
+):
     if "specializable" not in base_family_stats:
         return
 
@@ -157,21 +184,34 @@ def print_comparative_specialization_stats(name, base_family_stats, head_family_
         head_rows = calculate_specialization_stats(head_family_stats, head_total)
         emit_table(
             ("Kind", "Base Count", "Base Ratio", "Head Count", "Head Ratio"),
-            join_rows(base_rows, head_rows)
+            join_rows(base_rows, head_rows),
         )
         base_rows = calculate_specialization_success_failure(base_family_stats)
         head_rows = calculate_specialization_success_failure(head_family_stats)
         rows = join_rows(base_rows, head_rows)
         if rows:
             print_title("Specialization attempts", 4)
-            emit_table(("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), rows)
-            base_rows = calculate_specialization_failure_kinds(name, base_family_stats, defines)
-            head_rows = calculate_specialization_failure_kinds(name, head_family_stats, defines)
             emit_table(
-                ("Failure kind", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"),
-                join_rows(base_rows, head_rows)
+                ("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), rows
+            )
+            base_rows = calculate_specialization_failure_kinds(
+                name, base_family_stats, defines
+            )
+            head_rows = calculate_specialization_failure_kinds(
+                name, head_family_stats, defines
+            )
+            emit_table(
+                (
+                    "Failure kind",
+                    "Base Count:",
+                    "Base Ratio:",
+                    "Head Count:",
+                    "Head Ratio:",
+                ),
+                join_rows(base_rows, head_rows),
             )
 
+
 def gather_stats(input):
     # Note the output of this function must be JSON-serializable
 
@@ -179,7 +219,9 @@ def gather_stats(input):
         with open(input, "r") as fd:
             stats = json.load(fd)
 
-        stats["_stats_defines"] = {int(k): v for k, v in stats["_stats_defines"].items()}
+        stats["_stats_defines"] = {
+            int(k): v for k, v in stats["_stats_defines"].items()
+        }
         stats["_defines"] = {int(k): v for k, v in stats["_defines"].items()}
         return stats
 
@@ -191,18 +233,20 @@ def gather_stats(input):
                     try:
                         key, value = line.split(":")
                     except ValueError:
-                        print(f"Unparsable line: '{line.strip()}' in  {filename}", file=sys.stderr)
+                        print(
+                            f"Unparsable line: '{line.strip()}' in  {filename}",
+                            file=sys.stderr,
+                        )
                         continue
                     key = key.strip()
                     value = int(value)
                     stats[key] += value
-            stats['__nfiles__'] += 1
+            stats["__nfiles__"] += 1
 
         import opcode
 
         stats["_specialized_instructions"] = [
-            op for op in opcode._specialized_opmap.keys()
-            if "__" not in op
+            op for op in opcode._specialized_opmap.keys() if "__" not in op
         ]
         stats["_stats_defines"] = get_stats_defines()
         stats["_defines"] = get_defines()
@@ -211,15 +255,17 @@ def gather_stats(input):
     else:
         raise ValueError(f"{input:r} is not a file or directory path")
 
-def extract_opcode_stats(stats):
+
+def extract_opcode_stats(stats, prefix):
     opcode_stats = collections.defaultdict(dict)
     for key, value in stats.items():
-        if not key.startswith("opcode"):
+        if not key.startswith(prefix):
             continue
-        name, _, rest = key[7:].partition("]")
+        name, _, rest = key[len(prefix) + 1 :].partition("]")
         opcode_stats[name][rest.strip(".")] = value
     return opcode_stats
 
+
 def parse_kinds(spec_src, prefix="SPEC_FAIL"):
     defines = collections.defaultdict(list)
     start = "#define " + prefix + "_"
@@ -227,14 +273,16 @@ def parse_kinds(spec_src, prefix="SPEC_FAIL"):
         line = line.strip()
         if not line.startswith(start):
             continue
-        line = line[len(start):]
+        line = line[len(start) :]
         name, val = line.split()
         defines[int(val.strip())].append(name.strip())
     return defines
 
+
 def pretty(defname):
     return defname.replace("_", " ").lower()
 
+
 def kind_to_text(kind, defines, opname):
     if kind <= 8:
         return pretty(defines[kind][0])
@@ -248,9 +296,10 @@ def kind_to_text(kind, defines, opname):
         opname = "SUBSCR"
     for name in defines[kind]:
         if name.startswith(opname):
-            return pretty(name[len(opname)+1:])
+            return pretty(name[len(opname) + 1 :])
     return "kind " + str(kind)
 
+
 def categorized_counts(opcode_stats, specialized_instructions):
     basic = 0
     specialized = 0
@@ -258,7 +307,7 @@ def categorized_counts(opcode_stats, specialized_instructions):
     for name, opcode_stat in opcode_stats.items():
         if "execution_count" not in opcode_stat:
             continue
-        count = opcode_stat['execution_count']
+        count = opcode_stat["execution_count"]
         if "specializable" in opcode_stat:
             not_specialized += count
         elif name in specialized_instructions:
@@ -269,12 +318,13 @@ def categorized_counts(opcode_stats, specialized_instructions):
             basic += count
     return basic, not_specialized, specialized
 
+
 def print_title(name, level=2):
-    print("#"*level, name)
+    print("#" * level, name)
     print()
 
-class Section:
 
+class Section:
     def __init__(self, title, level=2, summary=None):
         self.title = title
         self.level = level
@@ -295,12 +345,14 @@ def __exit__(*args):
         print("</details>")
         print()
 
+
 def to_str(x):
     if isinstance(x, int):
         return format(x, ",d")
     else:
         return str(x)
 
+
 def emit_table(header, rows):
     width = len(header)
     header_line = "|"
@@ -320,11 +372,28 @@ def emit_table(header, rows):
         print("|", " | ".join(to_str(i) for i in row), "|")
     print()
 
+
+def emit_histogram(title, stats, key, total):
+    rows = []
+    for k, v in stats.items():
+        if k.startswith(key):
+            entry = int(re.match(r".+\[([0-9]+)\]", k).groups()[0])
+            rows.append((f"<= {entry}", int(v), format_ratio(int(v), total)))
+    # Don't include larger buckets with 0 entries
+    for j in range(len(rows) - 1, -1, -1):
+        if rows[j][1] != 0:
+            break
+    rows = rows[: j + 1]
+
+    print(f"**{title}**\n")
+    emit_table(("Range", "Count:", "Ratio:"), rows)
+
+
 def calculate_execution_counts(opcode_stats, total):
     counts = []
     for name, opcode_stat in opcode_stats.items():
         if "execution_count" in opcode_stat:
-            count = opcode_stat['execution_count']
+            count = opcode_stat["execution_count"]
             miss = 0
             if "specializable" not in opcode_stat:
                 miss = opcode_stat.get("specialization.miss")
@@ -332,53 +401,61 @@ def calculate_execution_counts(opcode_stats, total):
     counts.sort(reverse=True)
     cumulative = 0
     rows = []
-    for (count, name, miss) in counts:
+    for count, name, miss in counts:
         cumulative += count
         if miss:
             miss = format_ratio(miss, count)
         else:
             miss = ""
-        rows.append((name, count, format_ratio(count, total),
-                     format_ratio(cumulative, total), miss))
+        rows.append(
+            (
+                name,
+                count,
+                format_ratio(count, total),
+                format_ratio(cumulative, total),
+                miss,
+            )
+        )
     return rows
 
+
 def emit_execution_counts(opcode_stats, total):
     with Section("Execution counts", summary="execution counts for all instructions"):
         rows = calculate_execution_counts(opcode_stats, total)
-        emit_table(
-            ("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"),
-            rows
-        )
+        emit_table(("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"), rows)
+
+
+def _emit_comparative_execution_counts(base_rows, head_rows):
+    base_data = {x[0]: x[1:] for x in base_rows}
+    head_data = {x[0]: x[1:] for x in head_rows}
+    opcodes = base_data.keys() | head_data.keys()
+
+    rows = []
+    default = [0, "0.0%", "0.0%", 0]
+    for opcode in opcodes:
+        base_entry = base_data.get(opcode, default)
+        head_entry = head_data.get(opcode, default)
+        if base_entry[0] == 0:
+            change = 1
+        else:
+            change = (head_entry[0] - base_entry[0]) / base_entry[0]
+        rows.append((opcode, base_entry[0], head_entry[0], f"{change:0.1%}"))
+
+    rows.sort(key=lambda x: abs(percentage_to_float(x[-1])), reverse=True)
+
+    emit_table(("Name", "Base Count:", "Head Count:", "Change:"), rows)
+
 
 def emit_comparative_execution_counts(
-    base_opcode_stats, base_total, head_opcode_stats, head_total
+    base_opcode_stats, base_total, head_opcode_stats, head_total, level=2
 ):
-    with Section("Execution counts", summary="execution counts for all instructions"):
+    with Section(
+        "Execution counts", summary="execution counts for all instructions", level=level
+    ):
         base_rows = calculate_execution_counts(base_opcode_stats, base_total)
         head_rows = calculate_execution_counts(head_opcode_stats, head_total)
-        base_data = dict((x[0], x[1:]) for x in base_rows)
-        head_data = dict((x[0], x[1:]) for x in head_rows)
-        opcodes = set(base_data.keys()) | set(head_data.keys())
-
-        rows = []
-        default = [0, "0.0%", "0.0%", 0]
-        for opcode in opcodes:
-            base_entry = base_data.get(opcode, default)
-            head_entry = head_data.get(opcode, default)
-            if base_entry[0] == 0:
-                change = 1
-            else:
-                change = (head_entry[0] - base_entry[0]) / base_entry[0]
-            rows.append(
-                (opcode, base_entry[0], head_entry[0],
-                 f"{100*change:0.1f}%"))
+        _emit_comparative_execution_counts(base_rows, head_rows)
 
-        rows.sort(key=lambda x: -abs(percentage_to_float(x[-1])))
-
-        emit_table(
-            ("Name", "Base Count:", "Head Count:", "Change:"),
-            rows
-        )
 
 def get_defines():
     spec_path = os.path.join(os.path.dirname(__file__), "../../Python/specialize.c")
@@ -386,12 +463,16 @@ def get_defines():
         defines = parse_kinds(spec_src)
     return defines
 
+
 def emit_specialization_stats(opcode_stats, defines):
     with Section("Specialization stats", summary="specialization stats by family"):
         for name, opcode_stat in opcode_stats.items():
             print_specialization_stats(name, opcode_stat, defines)
 
-def emit_comparative_specialization_stats(base_opcode_stats, head_opcode_stats, defines):
+
+def emit_comparative_specialization_stats(
+    base_opcode_stats, head_opcode_stats, defines
+):
     with Section("Specialization stats", summary="specialization stats by family"):
         opcodes = set(base_opcode_stats.keys()) & set(head_opcode_stats.keys())
         for opcode in opcodes:
@@ -399,6 +480,7 @@ def emit_comparative_specialization_stats(base_opcode_stats, head_opcode_stats,
                 opcode, base_opcode_stats[opcode], head_opcode_stats[opcode], defines
             )
 
+
 def calculate_specialization_effectiveness(
     opcode_stats, total, specialized_instructions
 ):
@@ -411,11 +493,17 @@ def calculate_specialization_effectiveness(
         ("Specialized", specialized, format_ratio(specialized, total)),
     ]
 
+
 def emit_specialization_overview(opcode_stats, total, specialized_instructions):
     with Section("Specialization effectiveness"):
-        rows = calculate_specialization_effectiveness(opcode_stats, total, specialized_instructions)
+        rows = calculate_specialization_effectiveness(
+            opcode_stats, total, specialized_instructions
+        )
         emit_table(("Instructions", "Count:", "Ratio:"), rows)
-        for title, field in (("Deferred", "specialization.deferred"), ("Misses", "specialization.miss")):
+        for title, field in (
+            ("Deferred", "specialization.deferred"),
+            ("Misses", "specialization.miss"),
+        ):
             total = 0
             counts = []
             for name, opcode_stat in opcode_stats.items():
@@ -428,11 +516,19 @@ def emit_specialization_overview(opcode_stats, total, specialized_instructions):
             counts.sort(reverse=True)
             if total:
                 with Section(f"{title} by instruction", 3):
-                    rows = [ (name, count, format_ratio(count, total)) for (count, name) in counts[:10] ]
+                    rows = [
+                        (name, count, format_ratio(count, total))
+                        for (count, name) in counts[:10]
+                    ]
                     emit_table(("Name", "Count:", "Ratio:"), rows)
 
+
 def emit_comparative_specialization_overview(
-    base_opcode_stats, base_total, head_opcode_stats, head_total, specialized_instructions
+    base_opcode_stats,
+    base_total,
+    head_opcode_stats,
+    head_total,
+    specialized_instructions,
 ):
     with Section("Specialization effectiveness"):
         base_rows = calculate_specialization_effectiveness(
@@ -442,16 +538,26 @@ def emit_comparative_specialization_overview(
             head_opcode_stats, head_total, specialized_instructions
         )
         emit_table(
-            ("Instructions", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"),
-            join_rows(base_rows, head_rows)
+            (
+                "Instructions",
+                "Base Count:",
+                "Base Ratio:",
+                "Head Count:",
+                "Head Ratio:",
+            ),
+            join_rows(base_rows, head_rows),
         )
 
+
 def get_stats_defines():
-    stats_path = os.path.join(os.path.dirname(__file__), "../../Include/cpython/pystats.h")
+    stats_path = os.path.join(
+        os.path.dirname(__file__), "../../Include/cpython/pystats.h"
+    )
     with open(stats_path) as stats_src:
         defines = parse_kinds(stats_src, prefix="EVAL_CALL")
     return defines
 
+
 def calculate_call_stats(stats, defines):
     total = 0
     for key, value in stats.items():
@@ -463,7 +569,7 @@ def calculate_call_stats(stats, defines):
             rows.append((key, value, format_ratio(value, total)))
         elif key.startswith("Calls "):
             name, index = key[:-1].split("[")
-            index =  int(index)
+            index = int(index)
             label = name + " (" + pretty(defines[index][0]) + ")"
             rows.append((label, value, format_ratio(value, total)))
     for key, value in stats.items():
@@ -471,11 +577,13 @@ def calculate_call_stats(stats, defines):
             rows.append((key, value, format_ratio(value, total)))
     return rows
 
+
 def emit_call_stats(stats, defines):
     with Section("Call stats", summary="Inlined calls and frame stats"):
         rows = calculate_call_stats(stats, defines)
         emit_table(("", "Count:", "Ratio:"), rows)
 
+
 def emit_comparative_call_stats(base_stats, head_stats, defines):
     with Section("Call stats", summary="Inlined calls and frame stats"):
         base_rows = calculate_call_stats(base_stats, defines)
@@ -483,15 +591,21 @@ def emit_comparative_call_stats(base_stats, head_stats, defines):
         rows = join_rows(base_rows, head_rows)
         rows.sort(key=lambda x: -percentage_to_float(x[-1]))
         emit_table(
-            ("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"),
-            rows
+            ("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), rows
         )
 
+
 def calculate_object_stats(stats):
     total_materializations = stats.get("Object new values")
-    total_allocations = stats.get("Object allocations") + stats.get("Object allocations from freelist")
-    total_increfs = stats.get("Object interpreter increfs") + stats.get("Object increfs")
-    total_decrefs = stats.get("Object interpreter decrefs") + stats.get("Object decrefs")
+    total_allocations = stats.get("Object allocations") + stats.get(
+        "Object allocations from freelist"
+    )
+    total_increfs = stats.get("Object interpreter increfs") + stats.get(
+        "Object increfs"
+    )
+    total_decrefs = stats.get("Object interpreter decrefs") + stats.get(
+        "Object decrefs"
+    )
     rows = []
     for key, value in stats.items():
         if key.startswith("Object"):
@@ -499,9 +613,9 @@ def calculate_object_stats(stats):
                 ratio = format_ratio(value, total_materializations)
             elif "allocations" in key:
                 ratio = format_ratio(value, total_allocations)
-            elif "increfs"     in key:
+            elif "increfs" in key:
                 ratio = format_ratio(value, total_increfs)
-            elif "decrefs"     in key:
+            elif "decrefs" in key:
                 ratio = format_ratio(value, total_decrefs)
             else:
                 ratio = ""
@@ -510,6 +624,7 @@ def calculate_object_stats(stats):
             rows.append((label, value, ratio))
     return rows
 
+
 def calculate_gc_stats(stats):
     gc_stats = []
     for key, value in stats.items():
@@ -526,40 +641,58 @@ def calculate_gc_stats(stats):
         for (i, gen) in enumerate(gc_stats)
     ]
 
+
 def emit_object_stats(stats):
     with Section("Object stats", summary="allocations, frees and dict materializatons"):
         rows = calculate_object_stats(stats)
-        emit_table(("",  "Count:", "Ratio:"), rows)
+        emit_table(("", "Count:", "Ratio:"), rows)
+
 
 def emit_comparative_object_stats(base_stats, head_stats):
     with Section("Object stats", summary="allocations, frees and dict materializatons"):
         base_rows = calculate_object_stats(base_stats)
         head_rows = calculate_object_stats(head_stats)
-        emit_table(("",  "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), join_rows(base_rows, head_rows))
+        emit_table(
+            ("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"),
+            join_rows(base_rows, head_rows),
+        )
+
 
 def emit_gc_stats(stats):
     with Section("GC stats", summary="GC collections and effectiveness"):
         rows = calculate_gc_stats(stats)
-        emit_table(("Generation:",  "Collections:", "Objects collected:", "Object visits:"), rows)
+        emit_table(
+            ("Generation:", "Collections:", "Objects collected:", "Object visits:"),
+            rows,
+        )
+
 
 def emit_comparative_gc_stats(base_stats, head_stats):
     with Section("GC stats", summary="GC collections and effectiveness"):
         base_rows = calculate_gc_stats(base_stats)
         head_rows = calculate_gc_stats(head_stats)
         emit_table(
-            ("Generation:",
-            "Base collections:", "Head collections:",
-            "Base objects collected:", "Head objects collected:",
-            "Base object visits:", "Head object visits:"),
-            join_rows(base_rows, head_rows))
+            (
+                "Generation:",
+                "Base collections:",
+                "Head collections:",
+                "Base objects collected:",
+                "Head objects collected:",
+                "Base object visits:",
+                "Head object visits:",
+            ),
+            join_rows(base_rows, head_rows),
+        )
+
 
 def get_total(opcode_stats):
     total = 0
     for opcode_stat in opcode_stats.values():
         if "execution_count" in opcode_stat:
-            total += opcode_stat['execution_count']
+            total += opcode_stat["execution_count"]
     return total
 
+
 def emit_pair_counts(opcode_stats, total):
     pair_counts = []
     for name_i, opcode_stat in opcode_stats.items():
@@ -572,15 +705,22 @@ def emit_pair_counts(opcode_stats, total):
         pair_counts.sort(reverse=True)
         cumulative = 0
         rows = []
-        for (count, pair) in itertools.islice(pair_counts, 100):
+        for count, pair in itertools.islice(pair_counts, 100):
             name_i, name_j = pair
             cumulative += count
-            rows.append((f"{name_i} {name_j}", count, format_ratio(count, total),
-                         format_ratio(cumulative, total)))
-        emit_table(("Pair", "Count:", "Self:", "Cumulative:"),
-            rows
-        )
-    with Section("Predecessor/Successor Pairs", summary="Top 5 predecessors and successors of each opcode"):
+            rows.append(
+                (
+                    f"{name_i} {name_j}",
+                    count,
+                    format_ratio(count, total),
+                    format_ratio(cumulative, total),
+                )
+            )
+        emit_table(("Pair", "Count:", "Self:", "Cumulative:"), rows)
+    with Section(
+        "Predecessor/Successor Pairs",
+        summary="Top 5 predecessors and successors of each opcode",
+    ):
         predecessors = collections.defaultdict(collections.Counter)
         successors = collections.defaultdict(collections.Counter)
         total_predecessors = collections.Counter()
@@ -598,38 +738,137 @@ def emit_pair_counts(opcode_stats, total):
                 continue
             pred_rows = succ_rows = ()
             if total1:
-                pred_rows = [(pred, count, f"{count/total1:.1%}")
-                             for (pred, count) in predecessors[name].most_common(5)]
+                pred_rows = [
+                    (pred, count, f"{count/total1:.1%}")
+                    for (pred, count) in predecessors[name].most_common(5)
+                ]
             if total2:
-                succ_rows = [(succ, count, f"{count/total2:.1%}")
-                             for (succ, count) in successors[name].most_common(5)]
+                succ_rows = [
+                    (succ, count, f"{count/total2:.1%}")
+                    for (succ, count) in successors[name].most_common(5)
+                ]
             with Section(name, 3, f"Successors and predecessors for {name}"):
-                emit_table(("Predecessors", "Count:", "Percentage:"),
-                    pred_rows
-                )
-                emit_table(("Successors", "Count:", "Percentage:"),
-                    succ_rows
-                )
+                emit_table(("Predecessors", "Count:", "Percentage:"), pred_rows)
+                emit_table(("Successors", "Count:", "Percentage:"), succ_rows)
+
+
+def calculate_optimization_stats(stats):
+    attempts = stats["Optimization attempts"]
+    created = stats["Optimization traces created"]
+    executed = stats["Optimization traces executed"]
+    uops = stats["Optimization uops executed"]
+    trace_stack_overflow = stats["Optimization trace stack overflow"]
+    trace_stack_underflow = stats["Optimization trace stack underflow"]
+    trace_too_long = stats["Optimization trace too long"]
+    trace_too_short = stats["Optimiztion trace too short"]
+    inner_loop = stats["Optimization inner loop"]
+    recursive_call = stats["Optimization recursive call"]
+
+    return [
+        ("Optimization attempts", attempts, ""),
+        ("Traces created", created, format_ratio(created, attempts)),
+        ("Traces executed", executed, ""),
+        ("Uops executed", uops, int(uops / (executed or 1))),
+        ("Trace stack overflow", trace_stack_overflow, ""),
+        ("Trace stack underflow", trace_stack_underflow, ""),
+        ("Trace too long", trace_too_long, ""),
+        ("Trace too short", trace_too_short, ""),
+        ("Inner loop found", inner_loop, ""),
+        ("Recursive call", recursive_call, ""),
+    ]
+
+
+def calculate_uop_execution_counts(opcode_stats):
+    total = 0
+    counts = []
+    for name, opcode_stat in opcode_stats.items():
+        if "execution_count" in opcode_stat:
+            count = opcode_stat["execution_count"]
+            counts.append((count, name))
+            total += count
+    counts.sort(reverse=True)
+    cumulative = 0
+    rows = []
+    for count, name in counts:
+        cumulative += count
+        rows.append(
+            (name, count, format_ratio(count, total), format_ratio(cumulative, total))
+        )
+    return rows
+
+
+def emit_optimization_stats(stats):
+    if "Optimization attempts" not in stats:
+        return
+
+    uop_stats = extract_opcode_stats(stats, "uops")
+
+    with Section(
+        "Optimization (Tier 2) stats", summary="statistics about the Tier 2 optimizer"
+    ):
+        with Section("Overall stats", level=3):
+            rows = calculate_optimization_stats(stats)
+            emit_table(("", "Count:", "Ratio:"), rows)
+
+        emit_histogram(
+            "Trace length histogram",
+            stats,
+            "Trace length",
+            stats["Optimization traces created"],
+        )
+        emit_histogram(
+            "Optimized trace length histogram",
+            stats,
+            "Optimized trace length",
+            stats["Optimization traces created"],
+        )
+        emit_histogram(
+            "Trace run length histogram",
+            stats,
+            "Trace run length",
+            stats["Optimization traces executed"],
+        )
+
+        with Section("Uop stats", level=3):
+            rows = calculate_uop_execution_counts(uop_stats)
+            emit_table(("Uop", "Count:", "Self:", "Cumulative:"), rows)
+
+        with Section("Unsupported opcodes", level=3):
+            unsupported_opcodes = extract_opcode_stats(stats, "unsupported_opcode")
+            data = []
+            for opcode, entry in unsupported_opcodes.items():
+                data.append((entry["count"], opcode))
+            data.sort(reverse=True)
+            rows = [(x[1], x[0]) for x in data]
+            emit_table(("Opcode", "Count"), rows)
+
+
+def emit_comparative_optimization_stats(base_stats, head_stats):
+    print("## Comparative optimization stats not implemented\n\n")
+
 
 def output_single_stats(stats):
-    opcode_stats = extract_opcode_stats(stats)
+    opcode_stats = extract_opcode_stats(stats, "opcode")
     total = get_total(opcode_stats)
     emit_execution_counts(opcode_stats, total)
     emit_pair_counts(opcode_stats, total)
     emit_specialization_stats(opcode_stats, stats["_defines"])
-    emit_specialization_overview(opcode_stats, total, stats["_specialized_instructions"])
+    emit_specialization_overview(
+        opcode_stats, total, stats["_specialized_instructions"]
+    )
     emit_call_stats(stats, stats["_stats_defines"])
     emit_object_stats(stats)
     emit_gc_stats(stats)
+    emit_optimization_stats(stats)
     with Section("Meta stats", summary="Meta statistics"):
-        emit_table(("", "Count:"), [('Number of data files', stats['__nfiles__'])])
+        emit_table(("", "Count:"), [("Number of data files", stats["__nfiles__"])])
 
 
 def output_comparative_stats(base_stats, head_stats):
-    base_opcode_stats = extract_opcode_stats(base_stats)
+    base_opcode_stats = extract_opcode_stats(base_stats, "opcode")
     base_total = get_total(base_opcode_stats)
 
-    head_opcode_stats = extract_opcode_stats(head_stats)
+    head_opcode_stats = extract_opcode_stats(head_stats, "opcode")
     head_total = get_total(head_opcode_stats)
 
     emit_comparative_execution_counts(
@@ -639,12 +878,17 @@ def output_comparative_stats(base_stats, head_stats):
         base_opcode_stats, head_opcode_stats, head_stats["_defines"]
     )
     emit_comparative_specialization_overview(
-        base_opcode_stats, base_total, head_opcode_stats, head_total,
-        head_stats["_specialized_instructions"]
+        base_opcode_stats,
+        base_total,
+        head_opcode_stats,
+        head_total,
+        head_stats["_specialized_instructions"],
     )
     emit_comparative_call_stats(base_stats, head_stats, head_stats["_stats_defines"])
     emit_comparative_object_stats(base_stats, head_stats)
     emit_comparative_gc_stats(base_stats, head_stats)
+    emit_comparative_optimization_stats(base_stats, head_stats)
+
 
 def output_stats(inputs, json_output=None):
     if len(inputs) == 1:
@@ -654,9 +898,7 @@ def output_stats(inputs, json_output=None):
         output_single_stats(stats)
     elif len(inputs) == 2:
         if json_output is not None:
-            raise ValueError(
-                "Can not output to JSON when there are multiple inputs"
-            )
+            raise ValueError("Can not output to JSON when there are multiple inputs")
 
         base_stats = gather_stats(inputs[0])
         head_stats = gather_stats(inputs[1])
@@ -665,6 +907,7 @@ def output_stats(inputs, json_output=None):
     print("---")
     print("Stats gathered on:", date.today())
 
+
 def main():
     parser = argparse.ArgumentParser(description="Summarize pystats results")
 
@@ -680,14 +923,14 @@ def main():
         If one source is provided, its stats are printed.
         If two sources are provided, comparative stats are printed.
         Default is {DEFAULT_DIR}.
-        """
+        """,
     )
 
     parser.add_argument(
         "--json-output",
         nargs="?",
         type=argparse.FileType("w"),
-        help="Output complete raw results to the given JSON file."
+        help="Output complete raw results to the given JSON file.",
     )
 
     args = parser.parse_args()
@@ -697,5 +940,6 @@ def main():
 
     output_stats(args.inputs, json_output=args.json_output)
 
+
 if __name__ == "__main__":
     main()
diff --git a/Tools/ssl/multissltests.py b/Tools/ssl/multissltests.py
index f066fb52cfd496..120e3883adc795 100755
--- a/Tools/ssl/multissltests.py
+++ b/Tools/ssl/multissltests.py
@@ -151,7 +151,10 @@ class AbstractBuilder(object):
     build_template = None
     depend_target = None
     install_target = 'install'
-    jobs = os.cpu_count()
+    if hasattr(os, 'process_cpu_count'):
+        jobs = os.process_cpu_count()
+    else:
+        jobs = os.cpu_count()
 
     module_files = (
         os.path.join(PYTHONROOT, "Modules/_ssl.c"),
diff --git a/Tools/wasm/wasm_build.py b/Tools/wasm/wasm_build.py
index 3558ecd869dfc5..c0b9999a5dad03 100755
--- a/Tools/wasm/wasm_build.py
+++ b/Tools/wasm/wasm_build.py
@@ -516,7 +516,11 @@ def make_cmd(self) -> List[str]:
     def getenv(self) -> Dict[str, Any]:
         """Generate environ dict for platform"""
         env = os.environ.copy()
-        env.setdefault("MAKEFLAGS", f"-j{os.cpu_count()}")
+        if hasattr(os, 'process_cpu_count'):
+            cpu_count = os.process_cpu_count()
+        else:
+            cpu_count = os.cpu_count()
+        env.setdefault("MAKEFLAGS", f"-j{cpu_count}")
         platenv = self.host.platform.getenv(self)
         for key, value in platenv.items():
             if value is None: