diff --git a/pandas/conftest.py b/pandas/conftest.py
index 39d4e25e4cbfd..78cdc2ac5a2bb 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1248,7 +1248,6 @@ def nullable_string_dtype(request):
     params=[
         "python",
         pytest.param("pyarrow", marks=td.skip_if_no("pyarrow")),
-        pytest.param("pyarrow_numpy", marks=td.skip_if_no("pyarrow")),
     ]
 )
 def string_storage(request):
@@ -1257,7 +1256,6 @@ def string_storage(request):
 
     * 'python'
     * 'pyarrow'
-    * 'pyarrow_numpy'
     """
     return request.param
 
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index a6625d99eaa71..4cd7e50f0ec50 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -12,7 +12,10 @@
 from __future__ import annotations
 
 import os
-from typing import Callable
+from typing import (
+    Any,
+    Callable,
+)
 
 import pandas._config.config as cf
 from pandas._config.config import (
@@ -506,12 +509,27 @@ def use_inf_as_na_cb(key) -> None:
     ``future.infer_string`` is set to True.
 """
 
+
+def is_valid_string_storage(value: Any) -> None:
+    legal_values = ["python", "pyarrow"]
+    if value not in legal_values:
+        msg = "Value must be one of python|pyarrow"
+        if value == "pyarrow_numpy":
+            # TODO: we can remove extra message after 3.0
+            msg += (
+                ". 'pyarrow_numpy' was specified, but this option should be "
+                "enabled using pandas.options.future.infer_string instead"
+            )
+        raise ValueError(msg)
+
+
 with cf.config_prefix("mode"):
     cf.register_option(
         "string_storage",
         "python",
         string_storage_doc,
-        validator=is_one_of_factory(["python", "pyarrow", "pyarrow_numpy"]),
+        # validator=is_one_of_factory(["python", "pyarrow"]),
+        validator=is_valid_string_storage,
     )
 
 
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index b1c5f4338a4ed..6bf20b6fcd5f7 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -516,19 +516,12 @@ def test_arrow_array(dtype):
     assert arr.equals(expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
+@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
-def test_arrow_roundtrip(dtype, string_storage2, request, using_infer_string):
+def test_arrow_roundtrip(dtype, string_storage, using_infer_string):
     # roundtrip possible from arrow 1.0.0
     pa = pytest.importorskip("pyarrow")
 
-    if using_infer_string and string_storage2 != "pyarrow_numpy":
-        request.applymarker(
-            pytest.mark.xfail(
-                reason="infer_string takes precedence over string storage"
-            )
-        )
-
     data = pd.array(["a", "b", None], dtype=dtype)
     df = pd.DataFrame({"a": data})
     table = pa.table(df)
@@ -536,30 +529,21 @@ def test_arrow_roundtrip(dtype, string_storage2, request, using_infer_string):
         assert table.field("a").type == "string"
     else:
         assert table.field("a").type == "large_string"
-    with pd.option_context("string_storage", string_storage2):
+    with pd.option_context("string_storage", string_storage):
         result = table.to_pandas()
     assert isinstance(result["a"].dtype, pd.StringDtype)
-    expected = df.astype(f"string[{string_storage2}]")
+    expected = df.astype(f"string[{string_storage}]")
     tm.assert_frame_equal(result, expected)
     # ensure the missing value is represented by NA and not np.nan or None
     assert result.loc[2, "a"] is result["a"].dtype.na_value
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
+@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
-def test_arrow_load_from_zero_chunks(
-    dtype, string_storage2, request, using_infer_string
-):
+def test_arrow_load_from_zero_chunks(dtype, string_storage, using_infer_string):
     # GH-41040
     pa = pytest.importorskip("pyarrow")
 
-    if using_infer_string and string_storage2 != "pyarrow_numpy":
-        request.applymarker(
-            pytest.mark.xfail(
-                reason="infer_string takes precedence over string storage"
-            )
-        )
-
     data = pd.array([], dtype=dtype)
     df = pd.DataFrame({"a": data})
     table = pa.table(df)
@@ -569,10 +553,10 @@ def test_arrow_load_from_zero_chunks(
         assert table.field("a").type == "large_string"
     # Instantiate the same table with no chunks at all
     table = pa.table([pa.chunked_array([], type=pa.string())], schema=table.schema)
-    with pd.option_context("string_storage", string_storage2):
+    with pd.option_context("string_storage", string_storage):
         result = table.to_pandas()
     assert isinstance(result["a"].dtype, pd.StringDtype)
-    expected = df.astype(f"string[{string_storage2}]")
+    expected = df.astype(f"string[{string_storage}]")
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py
index dceb93364d505..d38b728aaf120 100644
--- a/pandas/tests/arrays/string_/test_string_arrow.py
+++ b/pandas/tests/arrays/string_/test_string_arrow.py
@@ -27,16 +27,18 @@ def test_eq_all_na():
 
 
 def test_config(string_storage, request, using_infer_string):
-    if using_infer_string and string_storage != "pyarrow_numpy":
-        request.applymarker(pytest.mark.xfail(reason="infer string takes precedence"))
-    if string_storage == "pyarrow_numpy":
+    if using_infer_string and string_storage == "python":
+        # python string storage with na_value=NaN is not yet implemented
         request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
+
     with pd.option_context("string_storage", string_storage):
         assert StringDtype().storage == string_storage
         result = pd.array(["a", "b"])
         assert result.dtype.storage == string_storage
 
-    dtype = StringDtype(string_storage)
+    dtype = StringDtype(
+        string_storage, na_value=np.nan if using_infer_string else pd.NA
+    )
     expected = dtype.construct_array_type()._from_sequence(["a", "b"], dtype=dtype)
     tm.assert_equal(result, expected)
 
diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
index b6510f384fabe..ea9cc22d93758 100644
--- a/pandas/tests/frame/methods/test_astype.py
+++ b/pandas/tests/frame/methods/test_astype.py
@@ -912,3 +912,12 @@ def test_astype_to_string_not_modifying_input(string_storage, val):
     with option_context("mode.string_storage", string_storage):
         df.astype("string", copy=False)
     tm.assert_frame_equal(df, expected)
+
+
+@pytest.mark.parametrize("val", [None, 1, 1.5, np.nan, NaT])
+def test_astype_to_string_dtype_not_modifying_input(any_string_dtype, val):
+    # GH#51073 - variant of the above test with explicit dtype instances
+    df = DataFrame({"a": ["a", "b", val]})
+    expected = df.copy()
+    df.astype(any_string_dtype)
+    tm.assert_frame_equal(df, expected)
diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py
index 91fa81b5bee2e..59779234b46d9 100644
--- a/pandas/tests/frame/methods/test_convert_dtypes.py
+++ b/pandas/tests/frame/methods/test_convert_dtypes.py
@@ -10,6 +10,8 @@
 
 
 class TestConvertDtypes:
+    # TODO convert_dtypes should not use NaN variant of string dtype, but always NA
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     @pytest.mark.parametrize(
         "convert_integer, expected", [(False, np.dtype("int32")), (True, "Int32")]
     )
@@ -18,9 +20,6 @@ def test_convert_dtypes(
     ):
         # Specific types are tested in tests/series/test_dtypes.py
         # Just check that it works for DataFrame here
-        if using_infer_string:
-            string_storage = "pyarrow_numpy"
-
         df = pd.DataFrame(
             {
                 "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py
index ab6cacc4cc860..bdefadf3dbec0 100644
--- a/pandas/tests/io/conftest.py
+++ b/pandas/tests/io/conftest.py
@@ -224,19 +224,3 @@ def compression_format(request):
 @pytest.fixture(params=_compression_formats_params)
 def compression_ext(request):
     return request.param[0]
-
-
-@pytest.fixture(
-    params=[
-        "python",
-        pytest.param("pyarrow", marks=td.skip_if_no("pyarrow")),
-    ]
-)
-def string_storage(request):
-    """
-    Parametrized fixture for pd.options.mode.string_storage.
-
-    * 'python'
-    * 'pyarrow'
-    """
-    return request.param