From 7d7d4fb04a6bd053c91cc8cdc469ca734000e60d Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Tue, 22 Aug 2023 13:09:28 -0400
Subject: [PATCH 01/36] build-gnu.sh: `/usr/bin/timeout` should not be
 hardcoded to /usr/bin location Fixes #5193

---
 util/build-gnu.sh | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/util/build-gnu.sh b/util/build-gnu.sh
index d852ed66fb..d02c8c842e 100755
--- a/util/build-gnu.sh
+++ b/util/build-gnu.sh
@@ -82,7 +82,16 @@ else
     ./bootstrap --skip-po
     ./configure --quiet --disable-gcc-warnings
     #Add timeout to to protect against hangs
-    sed -i 's|^"\$@|/usr/bin/timeout 600 "\$@|' build-aux/test-driver
+    # On MacOS there is no system /usr/bin/timeout 
+    # and trying to add it to /usr/bin (with symlink of copy binary) will fail unless system integrity protection is disabled (not ideal)
+    # ref: https://support.apple.com/en-us/102149
+    # On MacOS the Homebrew coreutils could be installed and then "sudo ln -s /opt/homebrew/bin/timeout /usr/local/bin/timeout"
+    # Set to /usr/local/timeout instead if /usr/bin/timeout is not found
+    if [ -x /usr/bin/timeout ] ; then
+        sed -i 's|^"\$@|/usr/bin/timeout 600 "\$@|' build-aux/test-driver
+    else 
+        sed -i 's|^"\$@|/usr/local/bin/timeout 600 "\$@|' build-aux/test-driver
+    fi
     # Change the PATH in the Makefile to test the uutils coreutils instead of the GNU coreutils
     sed -i "s/^[[:blank:]]*PATH=.*/  PATH='${UU_BUILD_DIR//\//\\/}\$(PATH_SEPARATOR)'\"\$\$PATH\" \\\/" Makefile
     sed -i 's| tr | /usr/bin/tr |' tests/init.sh
@@ -153,13 +162,28 @@ sed -i 's|touch |/usr/bin/touch |' tests/cp/reflink-perm.sh tests/ls/block-size.
 sed -i 's|ln -|/usr/bin/ln -|' tests/cp/link-deref.sh
 sed -i 's|cp |/usr/bin/cp |' tests/mv/hard-2.sh
 sed -i 's|paste |/usr/bin/paste |' tests/misc/od-endian.sh
-sed -i 's|timeout |/usr/bin/timeout |' tests/tail-2/follow-stdin.sh
+# On MacOS there is no system /usr/bin/timeout 
+# and trying to add it to /usr/bin (with symlink of copy binary) will fail unless system integrity protection is disabled (not ideal)
+# ref: https://support.apple.com/en-us/102149
+# On MacOS the Homebrew coreutils could be installed and then "sudo ln -s /opt/homebrew/bin/timeout /usr/local/bin/timeout"
+# Set to /usr/local/timeout instead if /usr/bin/timeout is not found
+if [ -x /usr/bin/timeout ] ; then
+        sed -i 's|timeout |/usr/bin/timeout |' tests/tail-2/follow-stdin.sh
+else 
+        sed -i 's|timeout |/usr/local/bin/timeout |' tests/tail-2/follow-stdin.sh
+fi
+
 
 # Add specific timeout to tests that currently hang to limit time spent waiting
-sed -i 's|\(^\s*\)seq \$|\1/usr/bin/timeout 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh
+if [ -x /usr/bin/timeout ] ; then
+    sed -i 's|\(^\s*\)seq \$|\1/usr/bin/timeout 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh
+else
+    sed -i 's|\(^\s*\)seq \$|\1/usr/local/bin/timeout 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh
+fi
 
-# Remove dup of /usr/bin/ when executed several times
+# Remove dup of /usr/bin/ (and /usr/local/bin) when executed several times
 grep -rlE '/usr/bin/\s?/usr/bin' init.cfg tests/* | xargs --no-run-if-empty sed -Ei 's|/usr/bin/\s?/usr/bin/|/usr/bin/|g'
+grep -rlE '/usr/local/bin/\s?/usr/local/bin' init.cfg tests/* | xargs --no-run-if-empty sed -Ei 's|/usr/local/bin/\s?/usr/local/bin/|/usr/local/bin/|g'
 
 #### Adjust tests to make them work with Rust/coreutils
 # in some cases, what we are doing in rust/coreutils is good (or better)

From 75cc6e3fdccedf551e297e54715cc65ed6045c7c Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Tue, 22 Aug 2023 13:22:24 -0400
Subject: [PATCH 02/36] added some TODO(s) for missing/moved locations

---
 util/build-gnu.sh | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/util/build-gnu.sh b/util/build-gnu.sh
index d02c8c842e..6c608a0607 100755
--- a/util/build-gnu.sh
+++ b/util/build-gnu.sh
@@ -151,6 +151,9 @@ sed -i -e '/tests\/misc\/seq-precision.sh/ D' \
 sed -i '/INT_OFLOW/ D' tests/misc/printf.sh
 
 # Use the system coreutils where the test fails due to error in a util that is not the one being tested
+# TODO : tests/tail-2/ does not appear to exist 
+# and have been moved to just tests/tail/ location
+# Might need to update the section bvelow to reflect that
 sed -i 's|stat|/usr/bin/stat|' tests/touch/60-seconds.sh tests/misc/sort-compress-proc.sh
 sed -i 's|ls -|/usr/bin/ls -|' tests/cp/same-file.sh tests/misc/mknod.sh tests/mv/part-symlink.sh
 sed -i 's|chmod |/usr/bin/chmod |' tests/du/inacc-dir.sh tests/tail-2/tail-n0f.sh tests/cp/fail-perm.sh tests/mv/i-2.sh tests/misc/shuf.sh
@@ -175,6 +178,9 @@ fi
 
 
 # Add specific timeout to tests that currently hang to limit time spent waiting
+# TODO : tests/misc/seq-precision.sh tests/misc/seq-long-double.sh do not appear to exist 
+# and have been moved to tests/seq/ location
+# Might need to update the section bvelow to reflect that
 if [ -x /usr/bin/timeout ] ; then
     sed -i 's|\(^\s*\)seq \$|\1/usr/bin/timeout 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh
 else
@@ -205,6 +211,9 @@ sed -i -e "s|rm: cannot remove 'rel': Permission denied|rm: cannot remove 'rel':
 
 # overlay-headers.sh test intends to check for inotify events,
 # however there's a bug because `---dis` is an alias for: `---disable-inotify`
+# TODO : tests/tail-2/ does not appear to exist 
+# and have been moved to just tests/tail/ location
+# Might need to update the section bvelow to reflect that
 sed -i -e "s|---dis ||g" tests/tail-2/overlay-headers.sh
 
 test -f "${UU_BUILD_DIR}/getlimits" || cp src/getlimits "${UU_BUILD_DIR}"

From c44c3cd71649252ae333dca0c55ab56d9ca52163 Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Tue, 22 Aug 2023 15:28:02 -0400
Subject: [PATCH 03/36] fixed spelling

---
 util/build-gnu.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/util/build-gnu.sh b/util/build-gnu.sh
index 6c608a0607..4414b7f12b 100755
--- a/util/build-gnu.sh
+++ b/util/build-gnu.sh
@@ -153,7 +153,7 @@ sed -i '/INT_OFLOW/ D' tests/misc/printf.sh
 # Use the system coreutils where the test fails due to error in a util that is not the one being tested
 # TODO : tests/tail-2/ does not appear to exist 
 # and have been moved to just tests/tail/ location
-# Might need to update the section bvelow to reflect that
+# Might need to update the section below to reflect that
 sed -i 's|stat|/usr/bin/stat|' tests/touch/60-seconds.sh tests/misc/sort-compress-proc.sh
 sed -i 's|ls -|/usr/bin/ls -|' tests/cp/same-file.sh tests/misc/mknod.sh tests/mv/part-symlink.sh
 sed -i 's|chmod |/usr/bin/chmod |' tests/du/inacc-dir.sh tests/tail-2/tail-n0f.sh tests/cp/fail-perm.sh tests/mv/i-2.sh tests/misc/shuf.sh
@@ -180,7 +180,7 @@ fi
 # Add specific timeout to tests that currently hang to limit time spent waiting
 # TODO : tests/misc/seq-precision.sh tests/misc/seq-long-double.sh do not appear to exist 
 # and have been moved to tests/seq/ location
-# Might need to update the section bvelow to reflect that
+# Might need to update the section below to reflect that
 if [ -x /usr/bin/timeout ] ; then
     sed -i 's|\(^\s*\)seq \$|\1/usr/bin/timeout 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh
 else
@@ -213,7 +213,7 @@ sed -i -e "s|rm: cannot remove 'rel': Permission denied|rm: cannot remove 'rel':
 # however there's a bug because `---dis` is an alias for: `---disable-inotify`
 # TODO : tests/tail-2/ does not appear to exist 
 # and have been moved to just tests/tail/ location
-# Might need to update the section bvelow to reflect that
+# Might need to update the section below to reflect that
 sed -i -e "s|---dis ||g" tests/tail-2/overlay-headers.sh
 
 test -f "${UU_BUILD_DIR}/getlimits" || cp src/getlimits "${UU_BUILD_DIR}"

From 6468845850a0c847f42fb400ffecbdcf54f8e99c Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Thu, 24 Aug 2023 20:36:52 -0400
Subject: [PATCH 04/36] refactor to check for system timeout once + commented
 out many moved/deleted test files that make script fail

---
 util/build-gnu.sh | 71 ++++++++++++++++++++++-------------------------
 1 file changed, 33 insertions(+), 38 deletions(-)

diff --git a/util/build-gnu.sh b/util/build-gnu.sh
index 4414b7f12b..c6991d15c1 100755
--- a/util/build-gnu.sh
+++ b/util/build-gnu.sh
@@ -18,6 +18,20 @@ path_GNU="$(readlink -fm -- "${path_GNU:-${path_UUTILS}/../gnu}")"
 
 ###
 
+# On MacOS there is no system /usr/bin/timeout 
+# and trying to add it to /usr/bin (with symlink of copy binary) will fail unless system integrity protection is disabled (not ideal)
+# ref: https://support.apple.com/en-us/102149
+# On MacOS the Homebrew coreutils could be installed and then "sudo ln -s /opt/homebrew/bin/timeout /usr/local/bin/timeout"
+# Set to /usr/local/bin/timeout instead if /usr/bin/timeout is not found
+SYSTEM_TIMEOUT="timeout"
+if [ -x /usr/bin/timeout ] ; then
+    SYSTEM_TIMEOUT="/usr/bin/timeout"
+elif [ -x /usr/local/bin/timeout ] ; then
+    SYSTEM_TIMEOUT="/usr/local/bin/timeout"  
+fi
+
+###
+
 if test ! -d "${path_GNU}"; then
     echo "Could not find GNU coreutils (expected at '${path_GNU}')"
     echo "Run the following to download into the expected path:"
@@ -82,16 +96,7 @@ else
     ./bootstrap --skip-po
     ./configure --quiet --disable-gcc-warnings
     #Add timeout to to protect against hangs
-    # On MacOS there is no system /usr/bin/timeout 
-    # and trying to add it to /usr/bin (with symlink of copy binary) will fail unless system integrity protection is disabled (not ideal)
-    # ref: https://support.apple.com/en-us/102149
-    # On MacOS the Homebrew coreutils could be installed and then "sudo ln -s /opt/homebrew/bin/timeout /usr/local/bin/timeout"
-    # Set to /usr/local/timeout instead if /usr/bin/timeout is not found
-    if [ -x /usr/bin/timeout ] ; then
-        sed -i 's|^"\$@|/usr/bin/timeout 600 "\$@|' build-aux/test-driver
-    else 
-        sed -i 's|^"\$@|/usr/local/bin/timeout 600 "\$@|' build-aux/test-driver
-    fi
+    sed -i 's|^"\$@|'"${SYSTEM_TIMEOUT}"' 600 "\$@|' build-aux/test-driver
     # Change the PATH in the Makefile to test the uutils coreutils instead of the GNU coreutils
     sed -i "s/^[[:blank:]]*PATH=.*/  PATH='${UU_BUILD_DIR//\//\\/}\$(PATH_SEPARATOR)'\"\$\$PATH\" \\\/" Makefile
     sed -i 's| tr | /usr/bin/tr |' tests/init.sh
@@ -148,44 +153,32 @@ sed -i -e '/tests\/misc\/seq-precision.sh/ D' \
     Makefile
 
 # printf doesn't limit the values used in its arg, so this produced ~2GB of output
-sed -i '/INT_OFLOW/ D' tests/misc/printf.sh
+# Looks like tests/misc/printf.sh does not exist anymore - comment it out for now
+#sed -i '/INT_OFLOW/ D' tests/misc/printf.sh
 
 # Use the system coreutils where the test fails due to error in a util that is not the one being tested
 # TODO : tests/tail-2/ does not appear to exist 
 # and have been moved to just tests/tail/ location
 # Might need to update the section below to reflect that
-sed -i 's|stat|/usr/bin/stat|' tests/touch/60-seconds.sh tests/misc/sort-compress-proc.sh
+# Also looks like tests/misc/sort-compress-proc.sh and tests/tail-2/tail-n0f.sh and tests/misc/shuf.sh and many others do not exist anymore or moved - comment it out for now
+sed -i 's|stat|/usr/bin/stat|' tests/touch/60-seconds.sh #tests/misc/sort-compress-proc.sh
 sed -i 's|ls -|/usr/bin/ls -|' tests/cp/same-file.sh tests/misc/mknod.sh tests/mv/part-symlink.sh
-sed -i 's|chmod |/usr/bin/chmod |' tests/du/inacc-dir.sh tests/tail-2/tail-n0f.sh tests/cp/fail-perm.sh tests/mv/i-2.sh tests/misc/shuf.sh
-sed -i 's|sort |/usr/bin/sort |' tests/ls/hyperlink.sh tests/misc/test-N.sh
-sed -i 's|split |/usr/bin/split |' tests/misc/factor-parallel.sh
-sed -i 's|id -|/usr/bin/id -|' tests/misc/runcon-no-reorder.sh
+sed -i 's|chmod |/usr/bin/chmod |' tests/du/inacc-dir.sh  tests/cp/fail-perm.sh tests/mv/i-2.sh #tests/misc/shuf.sh #tests/tail-2/tail-n0f.sh
+sed -i 's|sort |/usr/bin/sort |' tests/ls/hyperlink.sh #tests/misc/test-N.sh
+#sed -i 's|split |/usr/bin/split |' tests/misc/factor-parallel.sh
+#sed -i 's|id -|/usr/bin/id -|' tests/misc/runcon-no-reorder.sh
 # tests/ls/abmon-align.sh - https://github.com/uutils/coreutils/issues/3505
-sed -i 's|touch |/usr/bin/touch |' tests/cp/reflink-perm.sh tests/ls/block-size.sh tests/mv/update.sh tests/misc/ls-time.sh tests/misc/stat-nanoseconds.sh tests/misc/time-style.sh tests/misc/test-N.sh tests/ls/abmon-align.sh
+sed -i 's|touch |/usr/bin/touch |' tests/cp/reflink-perm.sh tests/ls/block-size.sh tests/mv/update.sh  tests/misc/time-style.sh  tests/ls/abmon-align.sh #tests/misc/ls-time.sh tests/misc/stat-nanoseconds.sh tests/misc/test-N.sh
 sed -i 's|ln -|/usr/bin/ln -|' tests/cp/link-deref.sh
 sed -i 's|cp |/usr/bin/cp |' tests/mv/hard-2.sh
-sed -i 's|paste |/usr/bin/paste |' tests/misc/od-endian.sh
-# On MacOS there is no system /usr/bin/timeout 
-# and trying to add it to /usr/bin (with symlink of copy binary) will fail unless system integrity protection is disabled (not ideal)
-# ref: https://support.apple.com/en-us/102149
-# On MacOS the Homebrew coreutils could be installed and then "sudo ln -s /opt/homebrew/bin/timeout /usr/local/bin/timeout"
-# Set to /usr/local/timeout instead if /usr/bin/timeout is not found
-if [ -x /usr/bin/timeout ] ; then
-        sed -i 's|timeout |/usr/bin/timeout |' tests/tail-2/follow-stdin.sh
-else 
-        sed -i 's|timeout |/usr/local/bin/timeout |' tests/tail-2/follow-stdin.sh
-fi
-
+#sed -i 's|paste |/usr/bin/paste |' tests/misc/od-endian.sh
+#sed -i 's|timeout |'"${SYSTEM_TIMEOUT}"' |' tests/tail-2/follow-stdin.sh
 
 # Add specific timeout to tests that currently hang to limit time spent waiting
 # TODO : tests/misc/seq-precision.sh tests/misc/seq-long-double.sh do not appear to exist 
 # and have been moved to tests/seq/ location
-# Might need to update the section below to reflect that
-if [ -x /usr/bin/timeout ] ; then
-    sed -i 's|\(^\s*\)seq \$|\1/usr/bin/timeout 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh
-else
-    sed -i 's|\(^\s*\)seq \$|\1/usr/local/bin/timeout 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh
-fi
+# Might need to update the section below to reflect that, but comment it out for now
+#sed -i 's|\(^\s*\)seq \$|\1'"${SYSTEM_TIMEOUT}"' 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh
 
 # Remove dup of /usr/bin/ (and /usr/local/bin) when executed several times
 grep -rlE '/usr/bin/\s?/usr/bin' init.cfg tests/* | xargs --no-run-if-empty sed -Ei 's|/usr/bin/\s?/usr/bin/|/usr/bin/|g'
@@ -214,7 +207,7 @@ sed -i -e "s|rm: cannot remove 'rel': Permission denied|rm: cannot remove 'rel':
 # TODO : tests/tail-2/ does not appear to exist 
 # and have been moved to just tests/tail/ location
 # Might need to update the section below to reflect that
-sed -i -e "s|---dis ||g" tests/tail-2/overlay-headers.sh
+#sed -i -e "s|---dis ||g" tests/tail-2/overlay-headers.sh
 
 test -f "${UU_BUILD_DIR}/getlimits" || cp src/getlimits "${UU_BUILD_DIR}"
 
@@ -271,11 +264,13 @@ sed -i -e "s/Try 'mv --help' for more information/For more information, try '--h
 
 # GNU doesn't support width > INT_MAX
 # disable these test cases
-sed -i -E "s|^([^#]*2_31.*)$|#\1|g" tests/misc/printf-cov.pl
+# TODO: moved or deleted tests/misc/printf-cov.pl
+#sed -i -E "s|^([^#]*2_31.*)$|#\1|g" tests/misc/printf-cov.pl
 
 sed -i -e "s/du: invalid -t argument/du: invalid --threshold argument/" -e "s/du: option requires an argument/error: a value is required for '--threshold <SIZE>' but none was supplied/" -e "/Try 'du --help' for more information./d" tests/du/threshold.sh
 
 # disable two kind of tests:
 # "hostid BEFORE --help" doesn't fail for GNU. we fail. we are probably doing better
 # "hostid BEFORE --help AFTER " same for this
-sed -i -e "s/env \$prog \$BEFORE \$opt > out2/env \$prog \$BEFORE \$opt > out2 #/" -e "s/env \$prog \$BEFORE \$opt AFTER > out3/env \$prog \$BEFORE \$opt AFTER > out3 #/" -e "s/compare exp out2/compare exp out2 #/" -e "s/compare exp out3/compare exp out3 #/" tests/misc/help-version-getopt.sh
+# TODO moved or deleted tests/misc/help-version-getopt.sh
+#sed -i -e "s/env \$prog \$BEFORE \$opt > out2/env \$prog \$BEFORE \$opt > out2 #/" -e "s/env \$prog \$BEFORE \$opt AFTER > out3/env \$prog \$BEFORE \$opt AFTER > out3 #/" -e "s/compare exp out2/compare exp out2 #/" -e "s/compare exp out3/compare exp out3 #/" tests/misc/help-version-getopt.sh

From 350b9c3d486449f592b9fd92f349b884926073cb Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Fri, 25 Aug 2023 13:43:48 -0400
Subject: [PATCH 05/36] reverted commented out test files and added more
 details in TODO(s)

---
 util/build-gnu.sh | 51 +++++++++++++++++++++--------------------------
 1 file changed, 23 insertions(+), 28 deletions(-)

diff --git a/util/build-gnu.sh b/util/build-gnu.sh
index c6991d15c1..587cf85809 100755
--- a/util/build-gnu.sh
+++ b/util/build-gnu.sh
@@ -152,37 +152,37 @@ sed -i -e '/tests\/misc\/invalid-opt.pl/ D' \
 sed -i -e '/tests\/misc\/seq-precision.sh/ D' \
     Makefile
 
+# TODO: many files and some directories modified with 'sed' in the sections below either no longer exist or have been moved
+# TODO: Might need to review and updated sections below
+# TODO: As a result this script will fail when executed normally as 'bash util/build-gnu.hs' due to the 'set -e' set at the beginning 
+# TODO: The rest of the 'sed' commands after first failure in this scenario will not be executed as bash will exit on first error
+# TODO: However, the behaviour might be different when running it via GitHub actions (GnuTests)
+# TODO: For now, when running in local a workaround would be to comment out the 'set -e' at the beginning of the file
+
 # printf doesn't limit the values used in its arg, so this produced ~2GB of output
-# Looks like tests/misc/printf.sh does not exist anymore - comment it out for now
-#sed -i '/INT_OFLOW/ D' tests/misc/printf.sh
+# TODO: this is the first one to likely fail as tests/misc/printf.sh does not exist/have been moved
+sed -i '/INT_OFLOW/ D' tests/misc/printf.sh
+# TODO: all commands below might not be executed
 
 # Use the system coreutils where the test fails due to error in a util that is not the one being tested
-# TODO : tests/tail-2/ does not appear to exist 
-# and have been moved to just tests/tail/ location
-# Might need to update the section below to reflect that
-# Also looks like tests/misc/sort-compress-proc.sh and tests/tail-2/tail-n0f.sh and tests/misc/shuf.sh and many others do not exist anymore or moved - comment it out for now
-sed -i 's|stat|/usr/bin/stat|' tests/touch/60-seconds.sh #tests/misc/sort-compress-proc.sh
+sed -i 's|stat|/usr/bin/stat|' tests/touch/60-seconds.sh tests/misc/sort-compress-proc.sh
 sed -i 's|ls -|/usr/bin/ls -|' tests/cp/same-file.sh tests/misc/mknod.sh tests/mv/part-symlink.sh
-sed -i 's|chmod |/usr/bin/chmod |' tests/du/inacc-dir.sh  tests/cp/fail-perm.sh tests/mv/i-2.sh #tests/misc/shuf.sh #tests/tail-2/tail-n0f.sh
-sed -i 's|sort |/usr/bin/sort |' tests/ls/hyperlink.sh #tests/misc/test-N.sh
-#sed -i 's|split |/usr/bin/split |' tests/misc/factor-parallel.sh
-#sed -i 's|id -|/usr/bin/id -|' tests/misc/runcon-no-reorder.sh
+sed -i 's|chmod |/usr/bin/chmod |' tests/du/inacc-dir.sh tests/tail-2/tail-n0f.sh tests/cp/fail-perm.sh tests/mv/i-2.sh tests/misc/shuf.sh
+sed -i 's|sort |/usr/bin/sort |' tests/ls/hyperlink.sh tests/misc/test-N.sh
+sed -i 's|split |/usr/bin/split |' tests/misc/factor-parallel.sh
+sed -i 's|id -|/usr/bin/id -|' tests/misc/runcon-no-reorder.sh
 # tests/ls/abmon-align.sh - https://github.com/uutils/coreutils/issues/3505
-sed -i 's|touch |/usr/bin/touch |' tests/cp/reflink-perm.sh tests/ls/block-size.sh tests/mv/update.sh  tests/misc/time-style.sh  tests/ls/abmon-align.sh #tests/misc/ls-time.sh tests/misc/stat-nanoseconds.sh tests/misc/test-N.sh
+sed -i 's|touch |/usr/bin/touch |' tests/cp/reflink-perm.sh tests/ls/block-size.sh tests/mv/update.sh tests/misc/ls-time.sh tests/misc/stat-nanoseconds.sh tests/misc/time-style.sh tests/misc/test-N.sh tests/ls/abmon-align.sh
 sed -i 's|ln -|/usr/bin/ln -|' tests/cp/link-deref.sh
 sed -i 's|cp |/usr/bin/cp |' tests/mv/hard-2.sh
-#sed -i 's|paste |/usr/bin/paste |' tests/misc/od-endian.sh
-#sed -i 's|timeout |'"${SYSTEM_TIMEOUT}"' |' tests/tail-2/follow-stdin.sh
+sed -i 's|paste |/usr/bin/paste |' tests/misc/od-endian.sh
+sed -i 's|timeout |/usr/bin/timeout |' tests/tail-2/follow-stdin.sh
 
 # Add specific timeout to tests that currently hang to limit time spent waiting
-# TODO : tests/misc/seq-precision.sh tests/misc/seq-long-double.sh do not appear to exist 
-# and have been moved to tests/seq/ location
-# Might need to update the section below to reflect that, but comment it out for now
-#sed -i 's|\(^\s*\)seq \$|\1'"${SYSTEM_TIMEOUT}"' 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh
+sed -i 's|\(^\s*\)seq \$|\1/usr/bin/timeout 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh
 
-# Remove dup of /usr/bin/ (and /usr/local/bin) when executed several times
+# Remove dup of /usr/bin/ when executed several times
 grep -rlE '/usr/bin/\s?/usr/bin' init.cfg tests/* | xargs --no-run-if-empty sed -Ei 's|/usr/bin/\s?/usr/bin/|/usr/bin/|g'
-grep -rlE '/usr/local/bin/\s?/usr/local/bin' init.cfg tests/* | xargs --no-run-if-empty sed -Ei 's|/usr/local/bin/\s?/usr/local/bin/|/usr/local/bin/|g'
 
 #### Adjust tests to make them work with Rust/coreutils
 # in some cases, what we are doing in rust/coreutils is good (or better)
@@ -204,10 +204,7 @@ sed -i -e "s|rm: cannot remove 'rel': Permission denied|rm: cannot remove 'rel':
 
 # overlay-headers.sh test intends to check for inotify events,
 # however there's a bug because `---dis` is an alias for: `---disable-inotify`
-# TODO : tests/tail-2/ does not appear to exist 
-# and have been moved to just tests/tail/ location
-# Might need to update the section below to reflect that
-#sed -i -e "s|---dis ||g" tests/tail-2/overlay-headers.sh
+sed -i -e "s|---dis ||g" tests/tail-2/overlay-headers.sh
 
 test -f "${UU_BUILD_DIR}/getlimits" || cp src/getlimits "${UU_BUILD_DIR}"
 
@@ -264,13 +261,11 @@ sed -i -e "s/Try 'mv --help' for more information/For more information, try '--h
 
 # GNU doesn't support width > INT_MAX
 # disable these test cases
-# TODO: moved or deleted tests/misc/printf-cov.pl
-#sed -i -E "s|^([^#]*2_31.*)$|#\1|g" tests/misc/printf-cov.pl
+sed -i -E "s|^([^#]*2_31.*)$|#\1|g" tests/misc/printf-cov.pl
 
 sed -i -e "s/du: invalid -t argument/du: invalid --threshold argument/" -e "s/du: option requires an argument/error: a value is required for '--threshold <SIZE>' but none was supplied/" -e "/Try 'du --help' for more information./d" tests/du/threshold.sh
 
 # disable two kind of tests:
 # "hostid BEFORE --help" doesn't fail for GNU. we fail. we are probably doing better
 # "hostid BEFORE --help AFTER " same for this
-# TODO moved or deleted tests/misc/help-version-getopt.sh
-#sed -i -e "s/env \$prog \$BEFORE \$opt > out2/env \$prog \$BEFORE \$opt > out2 #/" -e "s/env \$prog \$BEFORE \$opt AFTER > out3/env \$prog \$BEFORE \$opt AFTER > out3 #/" -e "s/compare exp out2/compare exp out2 #/" -e "s/compare exp out3/compare exp out3 #/" tests/misc/help-version-getopt.sh
+sed -i -e "s/env \$prog \$BEFORE \$opt > out2/env \$prog \$BEFORE \$opt > out2 #/" -e "s/env \$prog \$BEFORE \$opt AFTER > out3/env \$prog \$BEFORE \$opt AFTER > out3 #/" -e "s/compare exp out2/compare exp out2 #/" -e "s/compare exp out3/compare exp out3 #/" tests/misc/help-version-getopt.sh

From 84d96f9d028a4dd8c0f98ba6db2ba6dc90c79dcb Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Sat, 26 Aug 2023 11:11:46 -0400
Subject: [PATCH 06/36] split: refactor for more common use case

---
 src/uu/split/src/split.rs   |  81 ++++++++++++++++++-
 tests/by-util/test_split.rs | 152 +++++++++++++++++++++++++++++++++++-
 2 files changed, 229 insertions(+), 4 deletions(-)

diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs
index 71aacfed00..c4b7e6d660 100644
--- a/src/uu/split/src/split.rs
+++ b/src/uu/split/src/split.rs
@@ -51,14 +51,66 @@ const AFTER_HELP: &str = help_section!("after help", "split.md");
 
 #[uucore::main]
 pub fn uumain(args: impl uucore::Args) -> UResult<()> {
+    let args = args.collect_lossy();
+
+    let (args, obs_lines) = handle_obsolete(&args[..]);
+
     let matches = uu_app().try_get_matches_from(args)?;
-    match Settings::from(&matches) {
+
+    match Settings::from(&matches, &obs_lines) {
         Ok(settings) => split(&settings),
         Err(e) if e.requires_usage() => Err(UUsageError::new(1, format!("{e}"))),
         Err(e) => Err(USimpleError::new(1, format!("{e}"))),
     }
 }
 
+/// Extract obsolete shorthand (if any) for specifying lines in following scenarios (and similar)
+/// `split -22 file` would mean `split -l 22 file`
+/// `split -2de file` would mean `split -l 2 -d -e file`
+/// `split -x300e file` would mean `split -x -l 300 -e file`
+/// `split -x300e -22 file` would mean `split -x -e -l 22 file` (last obsolete lines option wins)
+/// following GNU `split` behavior
+fn handle_obsolete(args: &[String]) -> (Vec<String>, Option<String>) {
+    let mut v: Vec<String> = vec![];
+    let mut obs_lines = None;
+    for arg in args.iter() {
+        let slice = &arg;
+        if slice.starts_with('-') && !slice.starts_with("--") {
+            // start of the short option string
+            // extract numeric part and filter it out
+            let mut obs_lines_extracted: Vec<char> = vec![];
+            let filtered_slice: Vec<char> = slice
+                .chars()
+                .filter(|c| {
+                    if c.is_ascii_digit() {
+                        obs_lines_extracted.push(*c);
+                        false
+                    } else {
+                        true
+                    }
+                })
+                .collect();
+
+            if filtered_slice.get(1).is_some() {
+                // there were some short options in front of obsolete lines number
+                // i.e. '-xd100' or similar
+                // preserve it
+                v.push(filtered_slice.iter().collect())
+            }
+            if !obs_lines_extracted.is_empty() {
+                // obsolete lines value was extracted
+                obs_lines = Some(obs_lines_extracted.iter().collect());
+            }
+        } else {
+            // not a short option
+            // preserve it
+            v.push(arg.to_owned());
+        }
+    }
+    println!("{:#?}",v);
+    (v, obs_lines)
+}
+
 pub fn uu_app() -> Command {
     Command::new(uucore::util_name())
         .version(crate_version!())
@@ -357,6 +409,17 @@ impl fmt::Display for StrategyError {
 }
 
 impl Strategy {
+    /// Parse a strategy from obsolete line option value
+    fn from_obs(obs_value: &str) -> Result<Self, StrategyError> {
+        let n = parse_size(obs_value).map_err(StrategyError::Lines)?;
+        if n > 0 {
+            Ok(Self::Lines(n))
+        } else {
+            Err(StrategyError::Lines(ParseSizeError::ParseFailure(
+                obs_value.to_string(),
+            )))
+        }
+    }
     /// Parse a strategy from the command-line arguments.
     fn from(matches: &ArgMatches) -> Result<Self, StrategyError> {
         fn get_and_parse(
@@ -506,7 +569,7 @@ impl fmt::Display for SettingsError {
 
 impl Settings {
     /// Parse a strategy from the command-line arguments.
-    fn from(matches: &ArgMatches) -> Result<Self, SettingsError> {
+    fn from(matches: &ArgMatches, obs_lines: &Option<String>) -> Result<Self, SettingsError> {
         let additional_suffix = matches
             .get_one::<String>(OPT_ADDITIONAL_SUFFIX)
             .unwrap()
@@ -514,7 +577,19 @@ impl Settings {
         if additional_suffix.contains('/') {
             return Err(SettingsError::SuffixContainsSeparator(additional_suffix));
         }
-        let strategy = Strategy::from(matches).map_err(SettingsError::Strategy)?;
+
+        // obsolete lines option cannot be used simultaneously with OPT_LINES
+        let strategy = match obs_lines {
+            Some(obs_value) => {
+                if matches.value_source(OPT_LINES) == Some(ValueSource::CommandLine) {
+                    return Err(SettingsError::Strategy(StrategyError::MultipleWays));
+                } else {
+                    Strategy::from_obs(obs_value).map_err(SettingsError::Strategy)?
+                }
+            }
+            None => Strategy::from(matches).map_err(SettingsError::Strategy)?,
+        };
+
         let (suffix_type, suffix_start) = suffix_type_from(matches)?;
         let suffix_length_str = matches.get_one::<String>(OPT_SUFFIX_LENGTH).unwrap();
         let suffix_length: usize = suffix_length_str
diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs
index c5f32482ed..fe753f0849 100644
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@@ -2,7 +2,7 @@
 //
 // For the full copyright and license information, please view the LICENSE
 // file that was distributed with this source code.
-// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes onehundredlines nbbbb
+// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes onehundredlines nbbbb dxen
 
 use crate::common::util::{AtPath, TestScenario};
 use rand::{thread_rng, Rng, SeedableRng};
@@ -320,6 +320,156 @@ fn test_split_lines_number() {
         .stderr_only("split: invalid number of lines: '2fb'\n");
 }
 
+/// Test for obsolete lines option standalone
+#[test]
+fn test_split_obs_lines_standalone() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "obs-lines-standalone";
+    RandomFile::new(&at, name).add_lines(4);
+    ucmd.args(&["-2", name]).succeeds().no_stderr().no_stdout();
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 2);
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
+/// Test for invalid obsolete lines option
+#[test]
+fn test_split_invalid_obs_lines_standalone() {
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    at.touch("file");
+
+    scene
+        .ucmd()
+        .args(&["-2fb", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_only("error: unexpected argument '-f' found\n");
+}
+
+/// Test for obsolete lines option as part of combined short options
+#[test]
+fn test_split_obs_lines_within_combined_shorts() {
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    let name = "obs-lines-within-shorts";
+    RandomFile::new(&at, name).add_lines(400);
+
+    scene
+        .ucmd()
+        .args(&["-d200xe", name])
+        .succeeds()
+        .no_stderr()
+        .no_stdout();
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 2);
+    assert_eq!(glob.collate(), at.read_bytes(name))
+}
+
+/// Test for obsolete lines option starts as part of combined short options
+#[test]
+fn test_split_obs_lines_starts_combined_shorts() {
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    let name = "obs-lines-starts-shorts";
+    RandomFile::new(&at, name).add_lines(400);
+
+    scene
+        .ucmd()
+        .args(&["-x200d", name])
+        .succeeds()
+        .no_stderr()
+        .no_stdout();
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 2);
+    assert_eq!(glob.collate(), at.read_bytes(name))
+}
+
+/// Test for using both obsolete lines (standalone) option and short/long lines option simultaneously
+#[test]
+fn test_split_both_lines_and_obs_lines_standalone() {
+    // This test will ensure that if both lines option '-l' or '--lines'
+    // and obsolete lines option '-100' are used
+    // it fails
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    at.touch("file");
+
+    scene
+        .ucmd()
+        .args(&["-l", "2", "-2", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: cannot split in more than one way\n");
+    scene
+        .ucmd()
+        .args(&["--lines", "2", "-2", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: cannot split in more than one way\n");
+}
+
+/// Test for using more than one obsolete lines option (standalone)
+/// last one wins
+#[test]
+fn test_split_multiple_obs_lines_standalone() {
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    let name = "multiple-obs-lines";
+    RandomFile::new(&at, name).add_lines(400);
+
+    scene
+        .ucmd()
+        .args(&["-3000", "-200", name])
+        .succeeds()
+        .no_stderr()
+        .no_stdout();
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 2);
+    assert_eq!(glob.collate(), at.read_bytes(name))
+}
+
+/// Test for using more than one obsolete lines option within combined shorts
+/// last one wins
+#[test]
+fn test_split_multiple_obs_lines_within_combined() {
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    let name = "multiple-obs-lines";
+    RandomFile::new(&at, name).add_lines(400);
+
+    scene
+        .ucmd()
+        .args(&["-x5000d -e200x", name])
+        .succeeds()
+        .no_stderr()
+        .no_stdout();
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 2);
+    assert_eq!(glob.collate(), at.read_bytes(name))
+}
+
+/// Test for using both obsolete lines option within combined shorts with conflicting -n option simultaneously
+#[test]
+fn test_split_obs_lines_within_combined_with_number() {
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    at.touch("file");
+
+    scene
+        .ucmd()
+        .args(&["-3dxen", "4", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: cannot split in more than one way\n");
+    scene
+        .ucmd()
+        .args(&["-dxe30n", "4", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: cannot split in more than one way\n");
+}
+
 #[test]
 fn test_split_invalid_bytes_size() {
     new_ucmd!()

From 70dd8eb8dcdfffa7b69a22bc9a3162408d912f84 Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Sat, 26 Aug 2023 13:22:36 -0400
Subject: [PATCH 07/36] split: updates to target correct GNU coreutils release

---
 util/build-gnu.sh | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/util/build-gnu.sh b/util/build-gnu.sh
index 587cf85809..f6be73ff47 100755
--- a/util/build-gnu.sh
+++ b/util/build-gnu.sh
@@ -32,10 +32,16 @@ fi
 
 ###
 
+release_tag_GNU="v9.3"
+
 if test ! -d "${path_GNU}"; then
     echo "Could not find GNU coreutils (expected at '${path_GNU}')"
     echo "Run the following to download into the expected path:"
     echo "git clone --recurse-submodules https://github.com/coreutils/coreutils.git \"${path_GNU}\""
+    echo "After downloading GNU coreutils to \"${path_GNU}\" run the following commands to cheout latest release tag"
+    echo "cd \"${path_GNU}\""
+    echo "git fetch --all --tags"
+    echo "git checkout tags/\"${release_tag_GNU}\""
     exit 1
 fi
 
@@ -152,17 +158,8 @@ sed -i -e '/tests\/misc\/invalid-opt.pl/ D' \
 sed -i -e '/tests\/misc\/seq-precision.sh/ D' \
     Makefile
 
-# TODO: many files and some directories modified with 'sed' in the sections below either no longer exist or have been moved
-# TODO: Might need to review and updated sections below
-# TODO: As a result this script will fail when executed normally as 'bash util/build-gnu.hs' due to the 'set -e' set at the beginning 
-# TODO: The rest of the 'sed' commands after first failure in this scenario will not be executed as bash will exit on first error
-# TODO: However, the behaviour might be different when running it via GitHub actions (GnuTests)
-# TODO: For now, when running in local a workaround would be to comment out the 'set -e' at the beginning of the file
-
 # printf doesn't limit the values used in its arg, so this produced ~2GB of output
-# TODO: this is the first one to likely fail as tests/misc/printf.sh does not exist/have been moved
 sed -i '/INT_OFLOW/ D' tests/misc/printf.sh
-# TODO: all commands below might not be executed
 
 # Use the system coreutils where the test fails due to error in a util that is not the one being tested
 sed -i 's|stat|/usr/bin/stat|' tests/touch/60-seconds.sh tests/misc/sort-compress-proc.sh

From eac08f72c2ef97a7d7206fb3f98fc28a447172be Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Sat, 26 Aug 2023 13:24:08 -0400
Subject: [PATCH 08/36] split: double quotes

---
 util/build-gnu.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util/build-gnu.sh b/util/build-gnu.sh
index f6be73ff47..74ec296706 100755
--- a/util/build-gnu.sh
+++ b/util/build-gnu.sh
@@ -41,7 +41,7 @@ if test ! -d "${path_GNU}"; then
     echo "After downloading GNU coreutils to \"${path_GNU}\" run the following commands to cheout latest release tag"
     echo "cd \"${path_GNU}\""
     echo "git fetch --all --tags"
-    echo "git checkout tags/\"${release_tag_GNU}\""
+    echo "git checkout tags/${release_tag_GNU}"
     exit 1
 fi
 

From 4a4759c43c7cd10c81e53f9e352cd9f8f2a5fb63 Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Sat, 26 Aug 2023 13:44:34 -0400
Subject: [PATCH 09/36] split: updated to SYSTEM_TIMEOUT in a few more places

---
 util/build-gnu.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/util/build-gnu.sh b/util/build-gnu.sh
index 74ec296706..4e196debe5 100755
--- a/util/build-gnu.sh
+++ b/util/build-gnu.sh
@@ -173,10 +173,10 @@ sed -i 's|touch |/usr/bin/touch |' tests/cp/reflink-perm.sh tests/ls/block-size.
 sed -i 's|ln -|/usr/bin/ln -|' tests/cp/link-deref.sh
 sed -i 's|cp |/usr/bin/cp |' tests/mv/hard-2.sh
 sed -i 's|paste |/usr/bin/paste |' tests/misc/od-endian.sh
-sed -i 's|timeout |/usr/bin/timeout |' tests/tail-2/follow-stdin.sh
+sed -i 's|timeout |'"${SYSTEM_TIMEOUT}"' |' tests/tail-2/follow-stdin.sh
 
 # Add specific timeout to tests that currently hang to limit time spent waiting
-sed -i 's|\(^\s*\)seq \$|\1/usr/bin/timeout 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh
+sed -i 's|\(^\s*\)seq \$|\1'"${SYSTEM_TIMEOUT}"' 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh
 
 # Remove dup of /usr/bin/ when executed several times
 grep -rlE '/usr/bin/\s?/usr/bin' init.cfg tests/* | xargs --no-run-if-empty sed -Ei 's|/usr/bin/\s?/usr/bin/|/usr/bin/|g'

From a384973b1a7e40435e165ab7a65a59ab26da7fd8 Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Sat, 26 Aug 2023 13:51:21 -0400
Subject: [PATCH 10/36] split: remove dup for /usr/local/bin when executed
 several times

---
 util/build-gnu.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/util/build-gnu.sh b/util/build-gnu.sh
index 4e196debe5..ce15dd3f8e 100755
--- a/util/build-gnu.sh
+++ b/util/build-gnu.sh
@@ -180,6 +180,7 @@ sed -i 's|\(^\s*\)seq \$|\1'"${SYSTEM_TIMEOUT}"' 0.1 seq \$|' tests/misc/seq-pre
 
 # Remove dup of /usr/bin/ when executed several times
 grep -rlE '/usr/bin/\s?/usr/bin' init.cfg tests/* | xargs --no-run-if-empty sed -Ei 's|/usr/bin/\s?/usr/bin/|/usr/bin/|g'
+grep -rlE '/usr/local/bin/\s?/usr/local/bin' init.cfg tests/* | xargs --no-run-if-empty sed -Ei 's|/usr/local/bin/\s?/usr/local/bin/|/usr/local/bin/|g'
 
 #### Adjust tests to make them work with Rust/coreutils
 # in some cases, what we are doing in rust/coreutils is good (or better)

From 0edba89b55ba51d974c90645079454f128c2ae6b Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Sat, 26 Aug 2023 13:53:36 -0400
Subject: [PATCH 11/36] split: comments

---
 util/build-gnu.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util/build-gnu.sh b/util/build-gnu.sh
index ce15dd3f8e..57764ed507 100755
--- a/util/build-gnu.sh
+++ b/util/build-gnu.sh
@@ -178,7 +178,7 @@ sed -i 's|timeout |'"${SYSTEM_TIMEOUT}"' |' tests/tail-2/follow-stdin.sh
 # Add specific timeout to tests that currently hang to limit time spent waiting
 sed -i 's|\(^\s*\)seq \$|\1'"${SYSTEM_TIMEOUT}"' 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh
 
-# Remove dup of /usr/bin/ when executed several times
+# Remove dup of /usr/bin/ and /usr/local/bin/ when executed several times
 grep -rlE '/usr/bin/\s?/usr/bin' init.cfg tests/* | xargs --no-run-if-empty sed -Ei 's|/usr/bin/\s?/usr/bin/|/usr/bin/|g'
 grep -rlE '/usr/local/bin/\s?/usr/local/bin' init.cfg tests/* | xargs --no-run-if-empty sed -Ei 's|/usr/local/bin/\s?/usr/local/bin/|/usr/local/bin/|g'
 

From 2f35989ac34cda92b65328d325bdc615057e2dbe Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Mon, 28 Aug 2023 18:26:48 -0400
Subject: [PATCH 12/36] split: comments

---
 src/uu/split/src/split.rs | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs
index 77c87e7f5c..698c7818c0 100644
--- a/src/uu/split/src/split.rs
+++ b/src/uu/split/src/split.rs
@@ -108,7 +108,6 @@ fn handle_obsolete(args: &[String]) -> (Vec<String>, Option<String>) {
             v.push(arg.to_owned());
         }
     }
-    // println!("{:#?} , {:#?}", v, obs_lines);
     (v, obs_lines)
 }
 
@@ -465,8 +464,7 @@ impl Strategy {
         // Check that the user is not specifying more than one strategy.
         //
         // Note: right now, this exact behavior cannot be handled by
-        // `ArgGroup` since `ArgGroup` considers a default value `Arg`
-        // as "defined".
+        // overrides_with_all() due to obsolete lines value option
         match (
             obs_lines,
             matches.value_source(OPT_LINES) == Some(ValueSource::CommandLine),

From e79753c1cf25db25104e853e52e7f687dd776cfb Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Tue, 29 Aug 2023 13:58:26 -0400
Subject: [PATCH 13/36] split: refactor handle_obsolete() function

---
 src/uu/split/src/split.rs | 71 +++++++++++++++++++++------------------
 1 file changed, 39 insertions(+), 32 deletions(-)

diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs
index 698c7818c0..876d04606f 100644
--- a/src/uu/split/src/split.rs
+++ b/src/uu/split/src/split.rs
@@ -72,43 +72,50 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
 /// `split -x300e -22 file` would mean `split -x -e -l 22 file` (last obsolete lines option wins)
 /// following GNU `split` behavior
 fn handle_obsolete(args: &[String]) -> (Vec<String>, Option<String>) {
-    let mut v: Vec<String> = vec![];
     let mut obs_lines = None;
-    for arg in args.iter() {
-        let slice = &arg;
-        if slice.starts_with('-') && !slice.starts_with("--") {
-            // start of the short option string
-            // extract numeric part and filter it out
-            let mut obs_lines_extracted: Vec<char> = vec![];
-            let filtered_slice: Vec<char> = slice
-                .chars()
-                .filter(|c| {
-                    if c.is_ascii_digit() {
-                        obs_lines_extracted.push(*c);
-                        false
+    let filtered_args = args
+        .iter()
+        .filter_map(|slice| {
+            if slice.starts_with('-') && !slice.starts_with("--") {
+                // start of the short option string
+                // extract numeric part and filter it out
+                let mut obs_lines_extracted: Vec<char> = vec![];
+                let filtered_slice: Vec<char> = slice
+                    .chars()
+                    .filter(|c| {
+                        if c.is_ascii_digit() {
+                            obs_lines_extracted.push(*c);
+                            false
+                        } else {
+                            true
+                        }
+                    })
+                    .collect();
+
+                if obs_lines_extracted.is_empty() {
+                    // no obsolete lines value found/extracted
+                    Some(slice.to_owned())
+                } else {
+                    // obsolete lines value was extracted
+                    obs_lines = Some(obs_lines_extracted.iter().collect());
+                    if filtered_slice.get(1).is_some() {
+                        // there were some short options in front of or after obsolete lines value
+                        // i.e. '-xd100' or '-100de' or similar, which after extraction of obsolete lines value
+                        // would look like '-xd' or '-de' or similar
+                        // preserve it
+                        Some(filtered_slice.iter().collect())
                     } else {
-                        true
+                        None
                     }
-                })
-                .collect();
-
-            if filtered_slice.get(1).is_some() {
-                // there were some short options in front of obsolete lines number
-                // i.e. '-xd100' or similar
+                }
+            } else {
+                // not a short option
                 // preserve it
-                v.push(filtered_slice.iter().collect());
-            }
-            if !obs_lines_extracted.is_empty() {
-                // obsolete lines value was extracted
-                obs_lines = Some(obs_lines_extracted.iter().collect());
+                Some(slice.to_owned())
             }
-        } else {
-            // not a short option
-            // preserve it
-            v.push(arg.to_owned());
-        }
-    }
-    (v, obs_lines)
+        })
+        .collect();
+    (filtered_args, obs_lines)
 }
 
 pub fn uu_app() -> Command {

From 15c7170d20c532889ac8f3470c5d59bc12d10f2a Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Tue, 29 Aug 2023 15:49:47 -0400
Subject: [PATCH 14/36] split: fix for GNU Tests regression + tests

---
 src/uu/split/src/split.rs   | 15 ++++++---
 tests/by-util/test_split.rs | 65 +++++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs
index 876d04606f..dae24d36b7 100644
--- a/src/uu/split/src/split.rs
+++ b/src/uu/split/src/split.rs
@@ -76,8 +76,16 @@ fn handle_obsolete(args: &[String]) -> (Vec<String>, Option<String>) {
     let filtered_args = args
         .iter()
         .filter_map(|slice| {
-            if slice.starts_with('-') && !slice.starts_with("--") {
+            if slice.starts_with('-')
+                && !slice.starts_with("--")
+                && !slice.starts_with("-a")
+                && !slice.starts_with("-b")
+                && !slice.starts_with("-C")
+                && !slice.starts_with("-l")
+                && !slice.starts_with("-n")
+            {
                 // start of the short option string
+                // that can have obsolete lines option value in it
                 // extract numeric part and filter it out
                 let mut obs_lines_extracted: Vec<char> = vec![];
                 let filtered_slice: Vec<char> = slice
@@ -102,15 +110,14 @@ fn handle_obsolete(args: &[String]) -> (Vec<String>, Option<String>) {
                         // there were some short options in front of or after obsolete lines value
                         // i.e. '-xd100' or '-100de' or similar, which after extraction of obsolete lines value
                         // would look like '-xd' or '-de' or similar
-                        // preserve it
                         Some(filtered_slice.iter().collect())
                     } else {
                         None
                     }
                 }
             } else {
-                // not a short option
-                // preserve it
+                // either not a short option
+                // or a short option that cannot have obsolete lines value in it
                 Some(slice.to_owned())
             }
         })
diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs
index aabfcbe90d..9fba2177e4 100644
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@@ -170,6 +170,22 @@ fn test_split_str_prefixed_chunks_by_bytes() {
     assert_eq!(glob.collate(), at.read_bytes(name));
 }
 
+// Test short bytes option concatenated with value
+#[test]
+fn test_split_by_bytes_short_concatenated_with_value() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "split_by_bytes_short_concatenated_with_value";
+    RandomFile::new(&at, name).add_bytes(10000);
+    ucmd.args(&["-b1000", name]).succeeds();
+
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 10);
+    for filename in glob.collect() {
+        assert_eq!(glob.directory.metadata(&filename).len(), 1000);
+    }
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
 // This is designed to test what happens when the desired part size is not a
 // multiple of the buffer size and we hopefully don't overshoot the desired part
 // size.
@@ -326,6 +342,19 @@ fn test_split_lines_number() {
         .stderr_only("split: invalid number of lines: 'file'\n");
 }
 
+// Test short lines option with value concatenated
+#[test]
+fn test_split_lines_short_concatenated_with_value() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "split_num_prefixed_chunks_by_lines";
+    RandomFile::new(&at, name).add_lines(10000);
+    ucmd.args(&["-l1000", name]).succeeds();
+
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 10);
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
 /// Test for obsolete lines option standalone
 #[test]
 fn test_split_obs_lines_standalone() {
@@ -692,6 +721,19 @@ fn test_invalid_suffix_length() {
         .stderr_contains("invalid suffix length: 'xyz'");
 }
 
+// Test short suffix length option with value concatenated
+#[test]
+fn test_split_suffix_length_short_concatenated_with_value() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "split_num_prefixed_chunks_by_lines";
+    RandomFile::new(&at, name).add_lines(10000);
+    ucmd.args(&["-a4", name]).succeeds();
+
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]][[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 10);
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
 #[test]
 fn test_include_newlines() {
     let (at, mut ucmd) = at_and_ucmd!();
@@ -710,6 +752,19 @@ fn test_include_newlines() {
     assert_eq!(s, "5\n");
 }
 
+// Test short number of chunks option concatenated with value
+#[test]
+fn test_split_number_chunks_short_concatenated_with_value() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    ucmd.args(&["-n3", "threebytes.txt"])
+        .succeeds()
+        .no_stdout()
+        .no_stderr();
+    assert_eq!(at.read("xaa"), "a");
+    assert_eq!(at.read("xab"), "b");
+    assert_eq!(at.read("xac"), "c");
+}
+
 #[test]
 fn test_allow_empty_files() {
     let (at, mut ucmd) = at_and_ucmd!();
@@ -784,6 +839,16 @@ fn test_line_bytes() {
     assert_eq!(at.read("xad"), "ee\n");
 }
 
+#[test]
+fn test_line_bytes_concatenated_with_value() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    ucmd.args(&["-C8", "letters.txt"]).succeeds();
+    assert_eq!(at.read("xaa"), "aaaaaaaa");
+    assert_eq!(at.read("xab"), "a\nbbbb\n");
+    assert_eq!(at.read("xac"), "cccc\ndd\n");
+    assert_eq!(at.read("xad"), "ee\n");
+}
+
 #[test]
 fn test_line_bytes_no_final_newline() {
     let (at, mut ucmd) = at_and_ucmd!();

From 7f905a3b8d6d08c271d71b25dcba3d9559f83959 Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Tue, 29 Aug 2023 16:35:00 -0400
Subject: [PATCH 15/36] split: edge case for obs lines within combined shorts +
 test

---
 src/uu/split/src/split.rs   |  9 ++++++++-
 tests/by-util/test_split.rs | 21 +++++++++++++++++----
 2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs
index dae24d36b7..afe635456b 100644
--- a/src/uu/split/src/split.rs
+++ b/src/uu/split/src/split.rs
@@ -88,13 +88,20 @@ fn handle_obsolete(args: &[String]) -> (Vec<String>, Option<String>) {
                 // that can have obsolete lines option value in it
                 // extract numeric part and filter it out
                 let mut obs_lines_extracted: Vec<char> = vec![];
+                let mut obs_lines_end_reached = false;
                 let filtered_slice: Vec<char> = slice
                     .chars()
                     .filter(|c| {
-                        if c.is_ascii_digit() {
+                        // To correctly process scenario like '-x200a4'
+                        // we need to stop extracting digits once alphabetic character is encountered
+                        // after we already have something in obs_lines_extracted
+                        if c.is_ascii_digit() && !obs_lines_end_reached {
                             obs_lines_extracted.push(*c);
                             false
                         } else {
+                            if !obs_lines_extracted.is_empty() {
+                                obs_lines_end_reached = true;
+                            }
                             true
                         }
                     })
diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs
index 9fba2177e4..3280fff675 100644
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@@ -170,7 +170,7 @@ fn test_split_str_prefixed_chunks_by_bytes() {
     assert_eq!(glob.collate(), at.read_bytes(name));
 }
 
-// Test short bytes option concatenated with value
+/// Test short bytes option concatenated with value
 #[test]
 fn test_split_by_bytes_short_concatenated_with_value() {
     let (at, mut ucmd) = at_and_ucmd!();
@@ -342,7 +342,7 @@ fn test_split_lines_number() {
         .stderr_only("split: invalid number of lines: 'file'\n");
 }
 
-// Test short lines option with value concatenated
+/// Test short lines option with value concatenated
 #[test]
 fn test_split_lines_short_concatenated_with_value() {
     let (at, mut ucmd) = at_and_ucmd!();
@@ -401,6 +401,19 @@ fn test_split_obs_lines_within_combined_shorts() {
     assert_eq!(glob.collate(), at.read_bytes(name))
 }
 
+/// Test for obsolete lines option as part of combined short options with tailing suffix length with value
+#[test]
+fn test_split_obs_lines_within_combined_shorts_tailing_suffix_length() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "obs-lines-combined-shorts-tailing-suffix-length";
+    RandomFile::new(&at, name).add_lines(1000);
+    ucmd.args(&["-d200a4", name]).succeeds();
+
+    let glob = Glob::new(&at, ".", r"x\d\d\d\d$");
+    assert_eq!(glob.count(), 5);
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
 /// Test for obsolete lines option starts as part of combined short options
 #[test]
 fn test_split_obs_lines_starts_combined_shorts() {
@@ -721,7 +734,7 @@ fn test_invalid_suffix_length() {
         .stderr_contains("invalid suffix length: 'xyz'");
 }
 
-// Test short suffix length option with value concatenated
+/// Test short suffix length option with value concatenated
 #[test]
 fn test_split_suffix_length_short_concatenated_with_value() {
     let (at, mut ucmd) = at_and_ucmd!();
@@ -752,7 +765,7 @@ fn test_include_newlines() {
     assert_eq!(s, "5\n");
 }
 
-// Test short number of chunks option concatenated with value
+/// Test short number of chunks option concatenated with value
 #[test]
 fn test_split_number_chunks_short_concatenated_with_value() {
     let (at, mut ucmd) = at_and_ucmd!();

From b2ebe6a1d180fca68b5891ecf542f956e7df8331 Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Wed, 30 Aug 2023 11:15:11 -0400
Subject: [PATCH 16/36] build-gnu.sh: target GNU release v9.4

---
 util/build-gnu.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util/build-gnu.sh b/util/build-gnu.sh
index a6d8de29ef..21a231bc01 100755
--- a/util/build-gnu.sh
+++ b/util/build-gnu.sh
@@ -32,7 +32,7 @@ fi
 
 ###
 
-release_tag_GNU="v9.3"
+release_tag_GNU="v9.4"
 
 if test ! -d "${path_GNU}"; then
     echo "Could not find GNU coreutils (expected at '${path_GNU}')"

From 6f37b4b4cfcbe45725d91d63dadabd1aa9873a2b Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Wed, 30 Aug 2023 19:29:57 -0400
Subject: [PATCH 17/36] split: hyphenated values + tests

---
 src/uu/split/src/split.rs   | 50 ++++++++++++++++++++---
 tests/by-util/test_split.rs | 81 ++++++++++++++++++++++++++++++++++---
 2 files changed, 120 insertions(+), 11 deletions(-)

diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs
index afe635456b..7834dfe682 100644
--- a/src/uu/split/src/split.rs
+++ b/src/uu/split/src/split.rs
@@ -73,11 +73,18 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
 /// following GNU `split` behavior
 fn handle_obsolete(args: &[String]) -> (Vec<String>, Option<String>) {
     let mut obs_lines = None;
+    let mut preceding_long_opt_req_value = false;
+    let mut preceding_short_opt_req_value = false;
     let filtered_args = args
         .iter()
         .filter_map(|slice| {
+            let filter: Option<String>;
+            // check if the slice is a true short option (and not hyphen prefixed value of an option)
+            // and if so, a short option that can contain obsolete lines value
             if slice.starts_with('-')
                 && !slice.starts_with("--")
+                && !preceding_long_opt_req_value
+                && !preceding_short_opt_req_value
                 && !slice.starts_with("-a")
                 && !slice.starts_with("-b")
                 && !slice.starts_with("-C")
@@ -109,7 +116,7 @@ fn handle_obsolete(args: &[String]) -> (Vec<String>, Option<String>) {
 
                 if obs_lines_extracted.is_empty() {
                     // no obsolete lines value found/extracted
-                    Some(slice.to_owned())
+                    filter = Some(slice.to_owned());
                 } else {
                     // obsolete lines value was extracted
                     obs_lines = Some(obs_lines_extracted.iter().collect());
@@ -117,16 +124,41 @@ fn handle_obsolete(args: &[String]) -> (Vec<String>, Option<String>) {
                         // there were some short options in front of or after obsolete lines value
                         // i.e. '-xd100' or '-100de' or similar, which after extraction of obsolete lines value
                         // would look like '-xd' or '-de' or similar
-                        Some(filtered_slice.iter().collect())
+                        filter = Some(filtered_slice.iter().collect());
                     } else {
-                        None
+                        filter = None;
                     }
                 }
             } else {
                 // either not a short option
                 // or a short option that cannot have obsolete lines value in it
-                Some(slice.to_owned())
+                filter = Some(slice.to_owned());
             }
+            // capture if current slice is a preceding long option that requires value and does not use '=' to assign that value
+            // following slice should be treaded as value for this option
+            // even if it starts with '-' (which would be treated as hyphen prefixed value)
+            if slice.starts_with("--") {
+                preceding_long_opt_req_value = &slice[2..] == OPT_BYTES
+                    || &slice[2..] == OPT_LINE_BYTES
+                    || &slice[2..] == OPT_LINES
+                    || &slice[2..] == OPT_ADDITIONAL_SUFFIX
+                    || &slice[2..] == OPT_FILTER
+                    || &slice[2..] == OPT_NUMBER
+                    || &slice[2..] == OPT_SUFFIX_LENGTH;
+            }
+            // capture if current slice is a preceding short option that requires value and does not have value in the same slice (value separated by whitespace)
+            // following slice should be treaded as value for this option
+            // even if it starts with '-' (which would be treated as hyphen prefixed value)
+            preceding_short_opt_req_value =
+                slice == "-b" || slice == "-C" || slice == "-l" || slice == "-n" || slice == "-a";
+            // slice is a value
+            // reset preceding option flags
+            if !slice.starts_with('-') {
+                preceding_short_opt_req_value = false;
+                preceding_long_opt_req_value = false;
+            }
+            // return filter
+            filter
         })
         .collect();
     (filtered_args, obs_lines)
@@ -144,6 +176,7 @@ pub fn uu_app() -> Command {
             Arg::new(OPT_BYTES)
                 .short('b')
                 .long(OPT_BYTES)
+                .allow_hyphen_values(true)
                 .value_name("SIZE")
                 .help("put SIZE bytes per output file"),
         )
@@ -151,14 +184,15 @@ pub fn uu_app() -> Command {
             Arg::new(OPT_LINE_BYTES)
                 .short('C')
                 .long(OPT_LINE_BYTES)
+                .allow_hyphen_values(true)
                 .value_name("SIZE")
-                .default_value("2")
                 .help("put at most SIZE bytes of lines per output file"),
         )
         .arg(
             Arg::new(OPT_LINES)
                 .short('l')
                 .long(OPT_LINES)
+                .allow_hyphen_values(true)
                 .value_name("NUMBER")
                 .default_value("1000")
                 .help("put NUMBER lines/records per output file"),
@@ -167,6 +201,7 @@ pub fn uu_app() -> Command {
             Arg::new(OPT_NUMBER)
                 .short('n')
                 .long(OPT_NUMBER)
+                .allow_hyphen_values(true)
                 .value_name("CHUNKS")
                 .help("generate CHUNKS output files; see explanation below"),
         )
@@ -174,6 +209,7 @@ pub fn uu_app() -> Command {
         .arg(
             Arg::new(OPT_ADDITIONAL_SUFFIX)
                 .long(OPT_ADDITIONAL_SUFFIX)
+                .allow_hyphen_values(true)
                 .value_name("SUFFIX")
                 .default_value("")
                 .help("additional SUFFIX to append to output file names"),
@@ -181,6 +217,7 @@ pub fn uu_app() -> Command {
         .arg(
             Arg::new(OPT_FILTER)
                 .long(OPT_FILTER)
+                .allow_hyphen_values(true)
                 .value_name("COMMAND")
                 .value_hint(clap::ValueHint::CommandName)
                 .help(
@@ -250,9 +287,10 @@ pub fn uu_app() -> Command {
             Arg::new(OPT_SUFFIX_LENGTH)
                 .short('a')
                 .long(OPT_SUFFIX_LENGTH)
+                .allow_hyphen_values(true)
                 .value_name("N")
                 .default_value(OPT_DEFAULT_SUFFIX_LENGTH)
-                .help("use suffixes of fixed length N. 0 implies dynamic length."),
+                .help("use suffixes of fixed length N. 0 implies dynamic length, starting with 2"),
         )
         .arg(
             Arg::new(OPT_VERBOSE)
diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs
index 3280fff675..466dabda9a 100644
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@@ -254,6 +254,18 @@ fn test_additional_suffix_no_slash() {
         .usage_error("invalid suffix 'a/b', contains directory separator");
 }
 
+#[test]
+fn test_split_additional_suffix_hyphen_value() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "split_additional_suffix";
+    RandomFile::new(&at, name).add_lines(2000);
+    ucmd.args(&["--additional-suffix", "-300", name]).succeeds();
+
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]-300$");
+    assert_eq!(glob.count(), 2);
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
 // note: the test_filter* tests below are unix-only
 // windows support has been waived for now because of the difficulty of getting
 // the `cmd` call right
@@ -436,9 +448,9 @@ fn test_split_obs_lines_starts_combined_shorts() {
 /// Test for using both obsolete lines (standalone) option and short/long lines option simultaneously
 #[test]
 fn test_split_both_lines_and_obs_lines_standalone() {
-    // This test will ensure that if both lines option '-l' or '--lines'
-    // and obsolete lines option '-100' are used
-    // it fails
+    // This test will ensure that:
+    // if both lines option '-l' or '--lines' (with value) and obsolete lines option '-100' are used - it fails
+    // if standalone lines option is used incorrectly and treated as a hyphen prefixed value of other option - it fails
     let scene = TestScenario::new(util_name!());
     let at = &scene.fixtures;
     at.touch("file");
@@ -455,18 +467,77 @@ fn test_split_both_lines_and_obs_lines_standalone() {
         .fails()
         .code_is(1)
         .stderr_contains("split: cannot split in more than one way\n");
+}
+
+/// Test for using obsolete lines option incorrectly, so it is treated as a hyphen prefixed value of other option
+#[test]
+fn test_split_obs_lines_as_other_option_value() {
+    // This test will ensure that:
+    // if obsolete lines option is used incorrectly and treated as a hyphen prefixed value of other option - it fails
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    at.touch("file");
+
     scene
         .ucmd()
         .args(&["--lines", "-200", "file"])
         .fails()
         .code_is(1)
-        .stderr_contains("split: cannot split in more than one way\n");
+        .stderr_contains("split: invalid number of lines: '-200'\n");
     scene
         .ucmd()
         .args(&["-l", "-200", "file"])
         .fails()
         .code_is(1)
-        .stderr_contains("split: cannot split in more than one way\n");
+        .stderr_contains("split: invalid number of lines: '-200'\n");
+    scene
+        .ucmd()
+        .args(&["-a", "-200", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid suffix length: '-200'\n");
+    scene
+        .ucmd()
+        .args(&["--suffix-length", "-d200e", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid suffix length: '-d200e'\n");
+    scene
+        .ucmd()
+        .args(&["-C", "-200", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid number of bytes: '-200'\n");
+    scene
+        .ucmd()
+        .args(&["--line-bytes", "-x200a4", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid number of bytes: '-x200a4'\n");
+    scene
+        .ucmd()
+        .args(&["-b", "-200", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid number of bytes: '-200'\n");
+    scene
+        .ucmd()
+        .args(&["--bytes", "-200xd", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid number of bytes: '-200xd'\n");
+    scene
+        .ucmd()
+        .args(&["-n", "-200", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid number of chunks: -200\n");
+    scene
+        .ucmd()
+        .args(&["--number", "-e200", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid number of chunks: -e200\n");
 }
 
 /// Test for using more than one obsolete lines option (standalone)

From 843540d05fafe427ed34d7b8b546357d327d3df5 Mon Sep 17 00:00:00 2001
From: Sylvestre Ledru <sledru@mozilla.com>
Date: Thu, 31 Aug 2023 09:21:59 +0200
Subject: [PATCH 18/36] fix a typo

---
 util/build-gnu.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util/build-gnu.sh b/util/build-gnu.sh
index 21a231bc01..157265f61b 100755
--- a/util/build-gnu.sh
+++ b/util/build-gnu.sh
@@ -38,7 +38,7 @@ if test ! -d "${path_GNU}"; then
     echo "Could not find GNU coreutils (expected at '${path_GNU}')"
     echo "Run the following to download into the expected path:"
     echo "git clone --recurse-submodules https://github.com/coreutils/coreutils.git \"${path_GNU}\""
-    echo "After downloading GNU coreutils to \"${path_GNU}\" run the following commands to cheout latest release tag"
+    echo "After downloading GNU coreutils to \"${path_GNU}\" run the following commands to checkout latest release tag"
     echo "cd \"${path_GNU}\""
     echo "git fetch --all --tags"
     echo "git checkout tags/${release_tag_GNU}"

From 5bfe9b19ef775adf40c3e915a731611dbc4a6256 Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Thu, 31 Aug 2023 14:46:56 -0400
Subject: [PATCH 19/36] split: avoid using `collect_lossy` + test for invalid
 UTF8 arguments

---
 src/uu/split/src/split.rs   | 167 +++++++++++++++++++-----------------
 tests/by-util/test_split.rs |  45 ++++++++++
 2 files changed, 134 insertions(+), 78 deletions(-)

diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs
index 7834dfe682..68692d03c0 100644
--- a/src/uu/split/src/split.rs
+++ b/src/uu/split/src/split.rs
@@ -13,6 +13,7 @@ use crate::filenames::FilenameIterator;
 use crate::filenames::SuffixType;
 use clap::{crate_version, parser::ValueSource, Arg, ArgAction, ArgMatches, Command};
 use std::env;
+use std::ffi::OsString;
 use std::fmt;
 use std::fs::{metadata, File};
 use std::io;
@@ -52,9 +53,8 @@ const AFTER_HELP: &str = help_section!("after help", "split.md");
 
 #[uucore::main]
 pub fn uumain(args: impl uucore::Args) -> UResult<()> {
-    let args = args.collect_lossy();
-
-    let (args, obs_lines) = handle_obsolete(&args[..]);
+    
+    let (args, obs_lines) = handle_obsolete(args);
 
     let matches = uu_app().try_get_matches_from(args)?;
 
@@ -71,91 +71,102 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
 /// `split -x300e file` would mean `split -x -l 300 -e file`
 /// `split -x300e -22 file` would mean `split -x -e -l 22 file` (last obsolete lines option wins)
 /// following GNU `split` behavior
-fn handle_obsolete(args: &[String]) -> (Vec<String>, Option<String>) {
+fn handle_obsolete(args: impl uucore::Args) -> (Vec<OsString>, Option<String>) {
     let mut obs_lines = None;
     let mut preceding_long_opt_req_value = false;
     let mut preceding_short_opt_req_value = false;
     let filtered_args = args
-        .iter()
-        .filter_map(|slice| {
-            let filter: Option<String>;
-            // check if the slice is a true short option (and not hyphen prefixed value of an option)
-            // and if so, a short option that can contain obsolete lines value
-            if slice.starts_with('-')
-                && !slice.starts_with("--")
-                && !preceding_long_opt_req_value
-                && !preceding_short_opt_req_value
-                && !slice.starts_with("-a")
-                && !slice.starts_with("-b")
-                && !slice.starts_with("-C")
-                && !slice.starts_with("-l")
-                && !slice.starts_with("-n")
-            {
-                // start of the short option string
-                // that can have obsolete lines option value in it
-                // extract numeric part and filter it out
-                let mut obs_lines_extracted: Vec<char> = vec![];
-                let mut obs_lines_end_reached = false;
-                let filtered_slice: Vec<char> = slice
-                    .chars()
-                    .filter(|c| {
-                        // To correctly process scenario like '-x200a4'
-                        // we need to stop extracting digits once alphabetic character is encountered
-                        // after we already have something in obs_lines_extracted
-                        if c.is_ascii_digit() && !obs_lines_end_reached {
-                            obs_lines_extracted.push(*c);
-                            false
-                        } else {
-                            if !obs_lines_extracted.is_empty() {
-                                obs_lines_end_reached = true;
+        .filter_map(|os_slice| {
+            let filter: Option<OsString>;
+            if let Some(slice) = os_slice.to_str() {
+                // check if the slice is a true short option (and not hyphen prefixed value of an option)
+                // and if so, a short option that can contain obsolete lines value
+                if slice.starts_with('-')
+                    && !slice.starts_with("--")
+                    && !preceding_long_opt_req_value
+                    && !preceding_short_opt_req_value
+                    && !slice.starts_with("-a")
+                    && !slice.starts_with("-b")
+                    && !slice.starts_with("-C")
+                    && !slice.starts_with("-l")
+                    && !slice.starts_with("-n")
+                {
+                    // start of the short option string
+                    // that can have obsolete lines option value in it
+                    // extract numeric part and filter it out
+                    let mut obs_lines_extracted: Vec<char> = vec![];
+                    let mut obs_lines_end_reached = false;
+                    let filtered_slice: Vec<char> = slice
+                        .chars()
+                        .filter(|c| {
+                            // To correctly process scenario like '-x200a4'
+                            // we need to stop extracting digits once alphabetic character is encountered
+                            // after we already have something in obs_lines_extracted
+                            if c.is_ascii_digit() && !obs_lines_end_reached {
+                                obs_lines_extracted.push(*c);
+                                false
+                            } else {
+                                if !obs_lines_extracted.is_empty() {
+                                    obs_lines_end_reached = true;
+                                }
+                                true
                             }
-                            true
-                        }
-                    })
-                    .collect();
+                        })
+                        .collect();
 
-                if obs_lines_extracted.is_empty() {
-                    // no obsolete lines value found/extracted
-                    filter = Some(slice.to_owned());
-                } else {
-                    // obsolete lines value was extracted
-                    obs_lines = Some(obs_lines_extracted.iter().collect());
-                    if filtered_slice.get(1).is_some() {
-                        // there were some short options in front of or after obsolete lines value
-                        // i.e. '-xd100' or '-100de' or similar, which after extraction of obsolete lines value
-                        // would look like '-xd' or '-de' or similar
-                        filter = Some(filtered_slice.iter().collect());
+                    if obs_lines_extracted.is_empty() {
+                        // no obsolete lines value found/extracted
+                        filter = Some(OsString::from(slice));
                     } else {
-                        filter = None;
+                        // obsolete lines value was extracted
+                        obs_lines = Some(obs_lines_extracted.iter().collect());
+                        if filtered_slice.get(1).is_some() {
+                            // there were some short options in front of or after obsolete lines value
+                            // i.e. '-xd100' or '-100de' or similar, which after extraction of obsolete lines value
+                            // would look like '-xd' or '-de' or similar
+                            let filtered_slice: String = filtered_slice.iter().collect();
+                            filter = Some(OsString::from(filtered_slice));
+                        } else {
+                            filter = None;
+                        }
                     }
+                } else {
+                    // either not a short option
+                    // or a short option that cannot have obsolete lines value in it
+                    filter = Some(OsString::from(slice));
+                }
+                // capture if current slice is a preceding long option that requires value and does not use '=' to assign that value
+                // following slice should be treaded as value for this option
+                // even if it starts with '-' (which would be treated as hyphen prefixed value)
+                if slice.starts_with("--") {
+                    preceding_long_opt_req_value = &slice[2..] == OPT_BYTES
+                        || &slice[2..] == OPT_LINE_BYTES
+                        || &slice[2..] == OPT_LINES
+                        || &slice[2..] == OPT_ADDITIONAL_SUFFIX
+                        || &slice[2..] == OPT_FILTER
+                        || &slice[2..] == OPT_NUMBER
+                        || &slice[2..] == OPT_SUFFIX_LENGTH;
+                }
+                // capture if current slice is a preceding short option that requires value and does not have value in the same slice (value separated by whitespace)
+                // following slice should be treaded as value for this option
+                // even if it starts with '-' (which would be treated as hyphen prefixed value)
+                preceding_short_opt_req_value = slice == "-b"
+                    || slice == "-C"
+                    || slice == "-l"
+                    || slice == "-n"
+                    || slice == "-a";
+                // slice is a value
+                // reset preceding option flags
+                if !slice.starts_with('-') {
+                    preceding_short_opt_req_value = false;
+                    preceding_long_opt_req_value = false;
                 }
             } else {
-                // either not a short option
-                // or a short option that cannot have obsolete lines value in it
-                filter = Some(slice.to_owned());
-            }
-            // capture if current slice is a preceding long option that requires value and does not use '=' to assign that value
-            // following slice should be treaded as value for this option
-            // even if it starts with '-' (which would be treated as hyphen prefixed value)
-            if slice.starts_with("--") {
-                preceding_long_opt_req_value = &slice[2..] == OPT_BYTES
-                    || &slice[2..] == OPT_LINE_BYTES
-                    || &slice[2..] == OPT_LINES
-                    || &slice[2..] == OPT_ADDITIONAL_SUFFIX
-                    || &slice[2..] == OPT_FILTER
-                    || &slice[2..] == OPT_NUMBER
-                    || &slice[2..] == OPT_SUFFIX_LENGTH;
-            }
-            // capture if current slice is a preceding short option that requires value and does not have value in the same slice (value separated by whitespace)
-            // following slice should be treaded as value for this option
-            // even if it starts with '-' (which would be treated as hyphen prefixed value)
-            preceding_short_opt_req_value =
-                slice == "-b" || slice == "-C" || slice == "-l" || slice == "-n" || slice == "-a";
-            // slice is a value
-            // reset preceding option flags
-            if !slice.starts_with('-') {
-                preceding_short_opt_req_value = false;
-                preceding_long_opt_req_value = false;
+                // Cannot cleanly convert os_slice to UTF-8
+                // Do not process and return as-is
+                // This will cause failure later on, but we should not handle it here
+                // and let clap panic on invalid UTF-8 argument
+                filter = Some(os_slice);
             }
             // return filter
             filter
diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs
index 466dabda9a..d24d2cf54d 100644
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@@ -9,6 +9,7 @@ use rand::{thread_rng, Rng, SeedableRng};
 use regex::Regex;
 #[cfg(not(windows))]
 use std::env;
+use std::ffi::OsStr;
 use std::path::Path;
 use std::{
     fs::{read_dir, File},
@@ -1287,3 +1288,47 @@ fn test_split_invalid_input() {
         .no_stdout()
         .stderr_contains("split: invalid number of chunks: 0");
 }
+
+/// Test if there are invalid (non UTF-8) in the arguments - unix
+/// clap is expected to fail/panic
+#[cfg(unix)]
+#[test]
+fn test_split_non_utf8_argument_unix() {
+    use std::os::unix::ffi::OsStrExt;
+
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "test_split_non_utf8_argument";
+    let opt = OsStr::from_bytes("--additional-suffix".as_bytes());
+    RandomFile::new(&at, name).add_lines(2000);
+    // Here, the values 0x66 and 0x6f correspond to 'f' and 'o'
+    // respectively. The value 0x80 is a lone continuation byte, invalid
+    // in a UTF-8 sequence.
+    let opt_value = [0x66, 0x6f, 0x80, 0x6f];
+    let opt_value = OsStr::from_bytes(&opt_value[..]);
+    let name = OsStr::from_bytes(name.as_bytes());
+    ucmd.args(&[opt, opt_value, name])
+        .fails()
+        .stderr_contains("error: invalid UTF-8 was detected in one or more arguments");
+}
+
+/// Test if there are invalid (non UTF-8) in the arguments - windows
+/// clap is expected to fail/panic
+#[cfg(windows)]
+#[test]
+fn test_split_non_utf8_argument_windows() {
+    use std::os::windows::prelude::*;
+
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "test_split_non_utf8_argument";
+    let opt = OsStr::from_bytes("--additional-suffix".as_bytes());
+    RandomFile::new(&at, name).add_lines(2000);
+    // Here the values 0x0066 and 0x006f correspond to 'f' and 'o'
+    // respectively. The value 0xD800 is a lone surrogate half, invalid
+    // in a UTF-16 sequence.
+    let opt_value = [0x0066, 0x006f, 0xD800, 0x006f];
+    let opt_value = OsString::from_wide(&opt_value[..]);
+    let name = OsStr::from_bytes(name.as_bytes());
+    ucmd.args(&[opt, opt_value, name])
+        .fails()
+        .stderr_contains("error: invalid UTF-8 was detected in one or more arguments");
+}

From 271a108fa9e1ea75ed8c6e42c2a207a6da935c81 Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Thu, 31 Aug 2023 15:37:42 -0400
Subject: [PATCH 20/36] split: formatting

---
 src/uu/split/src/split.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs
index 68692d03c0..d76bfb2de7 100644
--- a/src/uu/split/src/split.rs
+++ b/src/uu/split/src/split.rs
@@ -53,7 +53,6 @@ const AFTER_HELP: &str = help_section!("after help", "split.md");
 
 #[uucore::main]
 pub fn uumain(args: impl uucore::Args) -> UResult<()> {
-    
     let (args, obs_lines) = handle_obsolete(args);
 
     let matches = uu_app().try_get_matches_from(args)?;

From d2812cbbc387b7dc3ad8f5f31863aad09ce1e6ad Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Thu, 31 Aug 2023 16:04:44 -0400
Subject: [PATCH 21/36] split: disable windows test for invalid UTF8

---
 tests/by-util/test_split.rs | 46 +++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs
index d24d2cf54d..aef9ea040e 100644
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@@ -9,7 +9,6 @@ use rand::{thread_rng, Rng, SeedableRng};
 use regex::Regex;
 #[cfg(not(windows))]
 use std::env;
-use std::ffi::OsStr;
 use std::path::Path;
 use std::{
     fs::{read_dir, File},
@@ -1294,6 +1293,7 @@ fn test_split_invalid_input() {
 #[cfg(unix)]
 #[test]
 fn test_split_non_utf8_argument_unix() {
+    use std::ffi::OsStr;
     use std::os::unix::ffi::OsStrExt;
 
     let (at, mut ucmd) = at_and_ucmd!();
@@ -1311,24 +1311,26 @@ fn test_split_non_utf8_argument_unix() {
         .stderr_contains("error: invalid UTF-8 was detected in one or more arguments");
 }
 
-/// Test if there are invalid (non UTF-8) in the arguments - windows
-/// clap is expected to fail/panic
-#[cfg(windows)]
-#[test]
-fn test_split_non_utf8_argument_windows() {
-    use std::os::windows::prelude::*;
-
-    let (at, mut ucmd) = at_and_ucmd!();
-    let name = "test_split_non_utf8_argument";
-    let opt = OsStr::from_bytes("--additional-suffix".as_bytes());
-    RandomFile::new(&at, name).add_lines(2000);
-    // Here the values 0x0066 and 0x006f correspond to 'f' and 'o'
-    // respectively. The value 0xD800 is a lone surrogate half, invalid
-    // in a UTF-16 sequence.
-    let opt_value = [0x0066, 0x006f, 0xD800, 0x006f];
-    let opt_value = OsString::from_wide(&opt_value[..]);
-    let name = OsStr::from_bytes(name.as_bytes());
-    ucmd.args(&[opt, opt_value, name])
-        .fails()
-        .stderr_contains("error: invalid UTF-8 was detected in one or more arguments");
-}
+// Test if there are invalid (non UTF-8) in the arguments - windows
+// clap is expected to fail/panic
+// comment it out for now
+// #[cfg(windows)]
+// #[test]
+// fn test_split_non_utf8_argument_windows() {
+//     use std::ffi::OsString;
+//     use std::os::windows::prelude::*;
+
+//     let (at, mut ucmd) = at_and_ucmd!();
+//     let name = "test_split_non_utf8_argument";
+//     let opt = OsStr::from_bytes("--additional-suffix".as_bytes());
+//     RandomFile::new(&at, name).add_lines(2000);
+//     // Here the values 0x0066 and 0x006f correspond to 'f' and 'o'
+//     // respectively. The value 0xD800 is a lone surrogate half, invalid
+//     // in a UTF-16 sequence.
+//     let opt_value = [0x0066, 0x006f, 0xD800, 0x006f];
+//     let opt_value = OsString::from_wide(&opt_value[..]);
+//     let name = OsStr::from_bytes(name.as_bytes());
+//     ucmd.args(&[opt, opt_value, name])
+//         .fails()
+//         .stderr_contains("error: invalid UTF-8 was detected in one or more arguments");
+// }

From e597189be7555164fc7f57df0b03f1ae788901ea Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Thu, 31 Aug 2023 20:48:44 -0400
Subject: [PATCH 22/36] split: fixed windows test for invalid unicode args

---
 tests/by-util/test_split.rs | 47 ++++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs
index aef9ea040e..965f2733e2 100644
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@@ -1290,8 +1290,8 @@ fn test_split_invalid_input() {
 
 /// Test if there are invalid (non UTF-8) in the arguments - unix
 /// clap is expected to fail/panic
-#[cfg(unix)]
 #[test]
+#[cfg(unix)]
 fn test_split_non_utf8_argument_unix() {
     use std::ffi::OsStr;
     use std::os::unix::ffi::OsStrExt;
@@ -1311,26 +1311,25 @@ fn test_split_non_utf8_argument_unix() {
         .stderr_contains("error: invalid UTF-8 was detected in one or more arguments");
 }
 
-// Test if there are invalid (non UTF-8) in the arguments - windows
-// clap is expected to fail/panic
-// comment it out for now
-// #[cfg(windows)]
-// #[test]
-// fn test_split_non_utf8_argument_windows() {
-//     use std::ffi::OsString;
-//     use std::os::windows::prelude::*;
-
-//     let (at, mut ucmd) = at_and_ucmd!();
-//     let name = "test_split_non_utf8_argument";
-//     let opt = OsStr::from_bytes("--additional-suffix".as_bytes());
-//     RandomFile::new(&at, name).add_lines(2000);
-//     // Here the values 0x0066 and 0x006f correspond to 'f' and 'o'
-//     // respectively. The value 0xD800 is a lone surrogate half, invalid
-//     // in a UTF-16 sequence.
-//     let opt_value = [0x0066, 0x006f, 0xD800, 0x006f];
-//     let opt_value = OsString::from_wide(&opt_value[..]);
-//     let name = OsStr::from_bytes(name.as_bytes());
-//     ucmd.args(&[opt, opt_value, name])
-//         .fails()
-//         .stderr_contains("error: invalid UTF-8 was detected in one or more arguments");
-// }
+/// Test if there are invalid (non UTF-8) in the arguments - windows
+/// clap is expected to fail/panic
+#[test]
+#[cfg(windows)]
+fn test_split_non_utf8_argument_windows() {
+    use std::ffi::OsString;
+    use std::os::windows::ffi::OsStringExt;
+
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "test_split_non_utf8_argument";
+    let opt = OsString::from("--additional-suffix");
+    RandomFile::new(&at, name).add_lines(2000);
+    // Here the values 0x0066 and 0x006f correspond to 'f' and 'o'
+    // respectively. The value 0xD800 is a lone surrogate half, invalid
+    // in a UTF-16 sequence.
+    let opt_value = [0x0066, 0x006f, 0xD800, 0x006f];
+    let opt_value = OsString::from_wide(&opt_value[..]);
+    let name = OsString::from(name);
+    ucmd.args(&[opt, opt_value, name])
+        .fails()
+        .stderr_contains("error: invalid UTF-8 was detected in one or more arguments");
+}

From 636c2bb7ae9444551d900d50829eb87414a23f8f Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Mon, 4 Sep 2023 12:05:26 -0400
Subject: [PATCH 23/36] uucore: parse_size_max and split

---
 src/uu/split/src/split.rs               | 28 ++++++++--------------
 src/uucore/src/lib/parser/parse_size.rs | 32 +++++++++++++++++++++++--
 2 files changed, 40 insertions(+), 20 deletions(-)

diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs
index d76bfb2de7..e39f6e93ff 100644
--- a/src/uu/split/src/split.rs
+++ b/src/uu/split/src/split.rs
@@ -21,7 +21,7 @@ use std::io::{stdin, BufRead, BufReader, BufWriter, ErrorKind, Read, Write};
 use std::path::Path;
 use uucore::display::Quotable;
 use uucore::error::{FromIo, UIoError, UResult, USimpleError, UUsageError};
-use uucore::parse_size::{parse_size, ParseSizeError};
+use uucore::parse_size::{parse_size_max, ParseSizeError};
 use uucore::uio_error;
 use uucore::{format_usage, help_about, help_section, help_usage};
 
@@ -419,8 +419,7 @@ impl NumberType {
         let parts: Vec<&str> = s.split('/').collect();
         match &parts[..] {
             [n_str] => {
-                let num_chunks = n_str
-                    .parse()
+                let num_chunks = parse_size_max(n_str)
                     .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?;
                 if num_chunks > 0 {
                     Ok(Self::Bytes(num_chunks))
@@ -429,32 +428,26 @@ impl NumberType {
                 }
             }
             ["l", n_str] => {
-                let num_chunks = n_str
-                    .parse()
+                let num_chunks = parse_size_max(n_str)
                     .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?;
                 Ok(Self::Lines(num_chunks))
             }
             ["l", k_str, n_str] => {
-                let num_chunks = n_str
-                    .parse()
+                let num_chunks = parse_size_max(n_str)
                     .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?;
-                let chunk_number = k_str
-                    .parse()
+                let chunk_number = parse_size_max(k_str)
                     .map_err(|_| NumberTypeError::ChunkNumber(k_str.to_string()))?;
                 Ok(Self::KthLines(chunk_number, num_chunks))
             }
             ["r", n_str] => {
-                let num_chunks = n_str
-                    .parse()
+                let num_chunks = parse_size_max(n_str)
                     .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?;
                 Ok(Self::RoundRobin(num_chunks))
             }
             ["r", k_str, n_str] => {
-                let num_chunks = n_str
-                    .parse()
+                let num_chunks = parse_size_max(n_str)
                     .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?;
-                let chunk_number = k_str
-                    .parse()
+                let chunk_number = parse_size_max(k_str)
                     .map_err(|_| NumberTypeError::ChunkNumber(k_str.to_string()))?;
                 Ok(Self::KthRoundRobin(chunk_number, num_chunks))
             }
@@ -523,7 +516,7 @@ impl Strategy {
             error: fn(ParseSizeError) -> StrategyError,
         ) -> Result<Strategy, StrategyError> {
             let s = matches.get_one::<String>(option).unwrap();
-            let n = parse_size(s).map_err(error)?;
+            let n = parse_size_max(s).map_err(error)?;
             if n > 0 {
                 Ok(strategy(n))
             } else {
@@ -542,7 +535,7 @@ impl Strategy {
             matches.value_source(OPT_NUMBER) == Some(ValueSource::CommandLine),
         ) {
             (Some(v), false, false, false, false) => {
-                let v = parse_size(v).map_err(|_| {
+                let v = parse_size_max(v).map_err(|_| {
                     StrategyError::Lines(ParseSizeError::ParseFailure(v.to_string()))
                 })?;
                 Ok(Self::Lines(v))
@@ -687,7 +680,6 @@ impl Settings {
         if additional_suffix.contains('/') {
             return Err(SettingsError::SuffixContainsSeparator(additional_suffix));
         }
-
         let strategy = Strategy::from(matches, obs_lines).map_err(SettingsError::Strategy)?;
         let (suffix_type, suffix_start) = suffix_type_from(matches)?;
         let suffix_length_str = matches.get_one::<String>(OPT_SUFFIX_LENGTH).unwrap();
diff --git a/src/uucore/src/lib/parser/parse_size.rs b/src/uucore/src/lib/parser/parse_size.rs
index 5f64afcd8a..63039e6093 100644
--- a/src/uucore/src/lib/parser/parse_size.rs
+++ b/src/uucore/src/lib/parser/parse_size.rs
@@ -7,6 +7,7 @@
 
 use std::error::Error;
 use std::fmt;
+use std::num::IntErrorKind;
 
 use crate::display::Quotable;
 
@@ -201,8 +202,10 @@ impl<'parser> Parser<'parser> {
         radix: u32,
         original_size: &str,
     ) -> Result<u64, ParseSizeError> {
-        u64::from_str_radix(numeric_string, radix)
-            .map_err(|_| ParseSizeError::ParseFailure(original_size.to_string()))
+        u64::from_str_radix(numeric_string, radix).map_err(|e| match e.kind() {
+            IntErrorKind::PosOverflow => ParseSizeError::size_too_big(original_size),
+            _ => ParseSizeError::ParseFailure(original_size.to_string()),
+        })
     }
 }
 
@@ -232,6 +235,23 @@ pub fn parse_size(size: &str) -> Result<u64, ParseSizeError> {
     Parser::default().parse(size)
 }
 
+/// Same as `parse_size()`, except returns `u64::MAX` on overflow
+/// GNU lib/coreutils include similar functionality 
+/// and GNU test suite checks this behavior for some utils
+pub fn parse_size_max(size: &str) -> Result<u64, ParseSizeError> {
+    let result = Parser::default().parse(size);
+    match result {
+        Ok(_) => result,
+        Err(error) => {
+            if let ParseSizeError::SizeTooBig(_) = error {
+                Ok(u64::MAX)
+            } else {
+                Err(error)
+            }
+        }
+    }
+}
+
 #[derive(Debug, PartialEq, Eq)]
 pub enum ParseSizeError {
     InvalidSuffix(String), // Suffix
@@ -392,6 +412,14 @@ mod tests {
         );
     }
 
+    #[test]
+    #[cfg(not(target_pointer_width = "128"))]
+    fn overflow_to_max_x64() {
+        assert_eq!(Ok(u64::MAX), parse_size_max("18446744073709551616"));
+        assert_eq!(Ok(u64::MAX), parse_size_max("10000000000000000000000"));
+        assert_eq!(Ok(u64::MAX), parse_size_max("1Y"));
+    }
+
     #[test]
     fn invalid_suffix() {
         let test_strings = ["5mib", "1eb", "1H"];

From 2ae1d8d1ccd7c4fff24b51b8d914cda2be9de006 Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Tue, 5 Sep 2023 17:13:30 -0400
Subject: [PATCH 24/36] split: missing functionality for --number option

---
 src/uu/split/src/split.rs               | 218 ++++++++++++++++++++++--
 src/uucore/src/lib/parser/parse_size.rs |   2 +-
 2 files changed, 204 insertions(+), 16 deletions(-)

diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs
index e39f6e93ff..23520f7092 100644
--- a/src/uu/split/src/split.rs
+++ b/src/uu/split/src/split.rs
@@ -19,9 +19,10 @@ use std::fs::{metadata, File};
 use std::io;
 use std::io::{stdin, BufRead, BufReader, BufWriter, ErrorKind, Read, Write};
 use std::path::Path;
+use std::u64;
 use uucore::display::Quotable;
 use uucore::error::{FromIo, UIoError, UResult, USimpleError, UUsageError};
-use uucore::parse_size::{parse_size_max, ParseSizeError};
+use uucore::parse_size::{parse_size, parse_size_max, ParseSizeError};
 use uucore::uio_error;
 use uucore::{format_usage, help_about, help_section, help_usage};
 
@@ -337,6 +338,10 @@ enum NumberType {
     /// Split into a specific number of chunks by byte.
     Bytes(u64),
 
+    /// Split into a specific number of chunks by byte
+    /// but output only the *k*th chunk.
+    KthBytes(u64, u64),
+
     /// Split into a specific number of chunks by line (approximately).
     Lines(u64),
 
@@ -349,7 +354,7 @@ enum NumberType {
 
     /// Assign lines via round-robin to the specified number of output
     /// chunks, but output only the *k*th chunk.
-    KthRoundRobin(u64, u64),
+    KthRoundRobin(u64, u64), // not yet implemented?
 }
 
 impl NumberType {
@@ -357,6 +362,7 @@ impl NumberType {
     fn num_chunks(&self) -> u64 {
         match self {
             Self::Bytes(n) => *n,
+            Self::KthBytes(_, n) => *n,
             Self::Lines(n) => *n,
             Self::KthLines(_, n) => *n,
             Self::RoundRobin(n) => *n,
@@ -375,6 +381,7 @@ enum NumberTypeError {
     ///
     /// ```ignore
     /// -n N
+    /// -n K/N
     /// -n l/N
     /// -n l/K/N
     /// -n r/N
@@ -385,9 +392,12 @@ enum NumberTypeError {
     /// The chunk number was invalid.
     ///
     /// This can happen if the value of `K` in any of the following
-    /// command-line options is not a positive integer:
+    /// command-line options is not a positive integer
+    /// or if `K` is 0
+    /// or if `K` is greater than `N`:
     ///
     /// ```ignore
+    /// -n K/N
     /// -n l/K/N
     /// -n r/K/N
     /// ```
@@ -401,6 +411,7 @@ impl NumberType {
     ///
     /// ```ignore
     /// "N"
+    /// "K/N"
     /// "l/N"
     /// "l/K/N"
     /// "r/N"
@@ -412,14 +423,17 @@ impl NumberType {
     ///
     /// # Errors
     ///
-    /// If the string is not one of the valid number types, if `K` is
-    /// not a nonnegative integer, or if `N` is not a positive
-    /// integer, then this function returns [`NumberTypeError`].
+    /// If the string is not one of the valid number types,
+    /// if `K` is not a nonnegative integer,
+    /// or if `K` is 0,
+    /// or if `N` is not a positive integer,
+    /// or if `K` is greater than `N`
+    /// then this function returns [`NumberTypeError`].
     fn from(s: &str) -> Result<Self, NumberTypeError> {
         let parts: Vec<&str> = s.split('/').collect();
         match &parts[..] {
             [n_str] => {
-                let num_chunks = parse_size_max(n_str)
+                let num_chunks = parse_size(n_str)
                     .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?;
                 if num_chunks > 0 {
                     Ok(Self::Bytes(num_chunks))
@@ -427,28 +441,44 @@ impl NumberType {
                     Err(NumberTypeError::NumberOfChunks(s.to_string()))
                 }
             }
+            [k_str, n_str] if !k_str.starts_with('l') && !k_str.starts_with('r') => {
+                let num_chunks = parse_size(n_str)
+                    .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?;
+                let chunk_number = parse_size(k_str)
+                    .map_err(|_| NumberTypeError::ChunkNumber(k_str.to_string()))?;
+                if chunk_number > num_chunks || chunk_number == 0 {
+                    return Err(NumberTypeError::ChunkNumber(k_str.to_string()));
+                }
+                Ok(Self::KthBytes(chunk_number, num_chunks))
+            }
             ["l", n_str] => {
-                let num_chunks = parse_size_max(n_str)
+                let num_chunks = parse_size(n_str)
                     .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?;
                 Ok(Self::Lines(num_chunks))
             }
             ["l", k_str, n_str] => {
-                let num_chunks = parse_size_max(n_str)
+                let num_chunks = parse_size(n_str)
                     .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?;
-                let chunk_number = parse_size_max(k_str)
+                let chunk_number = parse_size(k_str)
                     .map_err(|_| NumberTypeError::ChunkNumber(k_str.to_string()))?;
+                if chunk_number > num_chunks || chunk_number == 0 {
+                    return Err(NumberTypeError::ChunkNumber(k_str.to_string()));
+                }
                 Ok(Self::KthLines(chunk_number, num_chunks))
             }
             ["r", n_str] => {
-                let num_chunks = parse_size_max(n_str)
+                let num_chunks = parse_size(n_str)
                     .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?;
                 Ok(Self::RoundRobin(num_chunks))
             }
             ["r", k_str, n_str] => {
-                let num_chunks = parse_size_max(n_str)
+                let num_chunks = parse_size(n_str)
                     .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?;
-                let chunk_number = parse_size_max(k_str)
+                let chunk_number = parse_size(k_str)
                     .map_err(|_| NumberTypeError::ChunkNumber(k_str.to_string()))?;
+                if chunk_number > num_chunks || chunk_number == 0 {
+                    return Err(NumberTypeError::ChunkNumber(k_str.to_string()));
+                }
                 Ok(Self::KthRoundRobin(chunk_number, num_chunks))
             }
             _ => Err(NumberTypeError::NumberOfChunks(s.to_string())),
@@ -538,7 +568,13 @@ impl Strategy {
                 let v = parse_size_max(v).map_err(|_| {
                     StrategyError::Lines(ParseSizeError::ParseFailure(v.to_string()))
                 })?;
-                Ok(Self::Lines(v))
+                if v > 0 {
+                    Ok(Self::Lines(v))
+                } else {
+                    Err(StrategyError::Lines(ParseSizeError::ParseFailure(
+                        v.to_string(),
+                    )))
+                }
             }
             (None, false, false, false, false) => Ok(Self::Lines(1000)),
             (None, true, false, false, false) => {
@@ -1263,6 +1299,93 @@ where
     }
 }
 
+/// Print the k-th chunk of a file to stdout, splitting by byte.
+///
+/// This function is like [`split_into_n_chunks_by_byte`], but instead
+/// of writing each chunk to its own file, it only writes to stdout
+/// the contents of the chunk identified by `chunk_number`
+///
+/// # Errors
+///
+/// This function returns an error if there is a problem reading from
+/// `reader` or writing to stdout.
+fn kth_chunks_by_byte<R>(
+    settings: &Settings,
+    reader: &mut R,
+    chunk_number: u64,
+    num_chunks: u64,
+) -> UResult<()>
+where
+    R: BufRead,
+{
+    // Get the size of the input file in bytes and compute the number
+    // of bytes per chunk.
+    //
+    // If the requested number of chunks exceeds the number of bytes
+    // in the file - just write empty byte string to stdout
+    // NOTE: the `elide_empty_files` parameter is ignored here
+    // as we do not generate any files
+    // and instead writing to stdout
+    let metadata = metadata(&settings.input).map_err(|_| {
+        USimpleError::new(1, format!("{}: cannot determine file size", settings.input))
+    })?;
+
+    let num_bytes = metadata.len();
+    // If input file is empty and we would have written zero chunks of output,
+    // then terminate immediately.
+    // This happens on `split -e -n 3 /dev/null`, for example.
+    if num_bytes == 0 {
+        return Ok(());
+    }
+
+    // Write to stdout instead of to a file.
+    let stdout = std::io::stdout();
+    let mut writer = stdout.lock();
+
+    let chunk_size = (num_bytes / (num_chunks)).max(1);
+    let mut num_bytes: usize = num_bytes.try_into().unwrap();
+
+    let mut i = 1;
+    loop {
+        let buf: &mut Vec<u8> = &mut vec![];
+        if num_bytes > 0 {
+            // Read `chunk_size` bytes from the reader into `buf`
+            // except the last.
+            //
+            // The last chunk gets all remaining bytes so that if the number
+            // of bytes in the input file was not evenly divisible by
+            // `num_chunks`, we don't leave any bytes behind.
+            let limit = {
+                if i == num_chunks {
+                    num_bytes.try_into().unwrap()
+                } else {
+                    chunk_size
+                }
+            };
+            let n_bytes_read = reader.by_ref().take(limit).read_to_end(buf);
+            match n_bytes_read {
+                Ok(n_bytes) => {
+                    num_bytes -= n_bytes;
+                }
+                Err(error) => {
+                    return Err(USimpleError::new(
+                        1,
+                        format!("{}: cannot read from input : {}", settings.input, error),
+                    ));
+                }
+            }
+            if i == chunk_number {
+                writer.write_all(buf)?;
+                break;
+            }
+            i += 1;
+        } else {
+            break;
+        }
+    }
+    Ok(())
+}
+
 /// Split a file into a specific number of chunks by line.
 ///
 /// This function always creates one output file for each chunk, even
@@ -1438,6 +1561,50 @@ where
     Ok(())
 }
 
+/// Print the k-th chunk of a file, splitting by line, but
+/// assign lines via round-robin to the specified number of output
+/// chunks, but output only the *k*th chunk.
+///
+/// This function is like [`kth_chunk_by_line`], as it only writes to stdout and
+/// prints out only *k*th chunk
+/// It is also like [`split_into_n_chunks_by_line_round_robin`], as it is assigning chunks
+/// using round robin distribution
+///
+/// # Errors
+///
+/// This function returns an error if there is a problem reading from
+/// `reader` or writing to one of the output files.
+///
+/// # See also
+///
+/// * [`split_into_n_chunks_by_line_round_robin`], which splits its input in the
+///   same way, but writes each chunk to its own file.
+fn kth_chunk_by_line_round_robin<R>(
+    _settings: &Settings,
+    reader: &mut R,
+    chunk_number: u64,
+    num_chunks: u64,
+) -> UResult<()>
+where
+    R: BufRead,
+{
+    // Write to stdout instead of to a file.
+    let stdout = std::io::stdout();
+    let mut writer = stdout.lock();
+
+    let num_chunks: usize = num_chunks.try_into().unwrap();
+    let chunk_number: usize = chunk_number.try_into().unwrap();
+    for (i, line_result) in reader.lines().enumerate() {
+        let line = line_result?;
+        let bytes = line.as_bytes();
+        if (i % num_chunks) == chunk_number {
+            writer.write_all(bytes)?;
+            writer.write_all(b"\n")?;
+        }
+    }
+    Ok(())
+}
+
 fn split(settings: &Settings) -> UResult<()> {
     let mut reader = BufReader::new(if settings.input == "-" {
         Box::new(stdin()) as Box<dyn Read>
@@ -1455,6 +1622,9 @@ fn split(settings: &Settings) -> UResult<()> {
         Strategy::Number(NumberType::Bytes(num_chunks)) => {
             split_into_n_chunks_by_byte(settings, &mut reader, num_chunks)
         }
+        Strategy::Number(NumberType::KthBytes(chunk_number, num_chunks)) => {
+            kth_chunks_by_byte(settings, &mut reader, chunk_number, num_chunks)
+        }
         Strategy::Number(NumberType::Lines(num_chunks)) => {
             split_into_n_chunks_by_line(settings, &mut reader, num_chunks)
         }
@@ -1467,7 +1637,12 @@ fn split(settings: &Settings) -> UResult<()> {
         Strategy::Number(NumberType::RoundRobin(num_chunks)) => {
             split_into_n_chunks_by_line_round_robin(settings, &mut reader, num_chunks)
         }
-        Strategy::Number(_) => Err(USimpleError::new(1, "-n mode not yet fully implemented")),
+        Strategy::Number(NumberType::KthRoundRobin(chunk_number, num_chunks)) => {
+            // The chunk number is given as a 1-indexed number, but it
+            // is a little easier to deal with a 0-indexed number.
+            let chunk_number = chunk_number - 1;
+            kth_chunk_by_line_round_robin(settings, &mut reader, chunk_number, num_chunks)
+        }
         Strategy::Lines(chunk_size) => {
             let mut writer = LineChunkWriter::new(chunk_size, settings)?;
             match std::io::copy(&mut reader, &mut writer) {
@@ -1570,6 +1745,18 @@ mod tests {
             NumberType::from("l/abc/456").unwrap_err(),
             NumberTypeError::ChunkNumber("abc".to_string())
         );
+        assert_eq!(
+            NumberType::from("l/456/123").unwrap_err(),
+            NumberTypeError::ChunkNumber("456".to_string())
+        );
+        assert_eq!(
+            NumberType::from("r/456/123").unwrap_err(),
+            NumberTypeError::ChunkNumber("456".to_string())
+        );
+        assert_eq!(
+            NumberType::from("456/123").unwrap_err(),
+            NumberTypeError::ChunkNumber("456".to_string())
+        );
         // In GNU split, the number of chunks get precedence:
         //
         //     $ split -n l/abc/xyz
@@ -1605,6 +1792,7 @@ mod tests {
     #[test]
     fn test_number_type_num_chunks() {
         assert_eq!(NumberType::from("123").unwrap().num_chunks(), 123);
+        assert_eq!(NumberType::from("123/456").unwrap().num_chunks(), 456);
         assert_eq!(NumberType::from("l/123").unwrap().num_chunks(), 123);
         assert_eq!(NumberType::from("l/123/456").unwrap().num_chunks(), 456);
         assert_eq!(NumberType::from("r/123").unwrap().num_chunks(), 123);
diff --git a/src/uucore/src/lib/parser/parse_size.rs b/src/uucore/src/lib/parser/parse_size.rs
index 63039e6093..4d9968bb73 100644
--- a/src/uucore/src/lib/parser/parse_size.rs
+++ b/src/uucore/src/lib/parser/parse_size.rs
@@ -236,7 +236,7 @@ pub fn parse_size(size: &str) -> Result<u64, ParseSizeError> {
 }
 
 /// Same as `parse_size()`, except returns `u64::MAX` on overflow
-/// GNU lib/coreutils include similar functionality 
+/// GNU lib/coreutils include similar functionality
 /// and GNU test suite checks this behavior for some utils
 pub fn parse_size_max(size: &str) -> Result<u64, ParseSizeError> {
     let result = Parser::default().parse(size);

From 2a0b4f88370f1ad787937d174b0ca8e6c2e4bf10 Mon Sep 17 00:00:00 2001
From: Yury Zhytkou <54360928+zhitkoff@users.noreply.github.com>
Date: Tue, 5 Sep 2023 18:14:14 -0400
Subject: [PATCH 25/36] Update build-gnu.sh

---
 util/build-gnu.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/util/build-gnu.sh b/util/build-gnu.sh
index b57b4fe769..2d949bbb33 100755
--- a/util/build-gnu.sh
+++ b/util/build-gnu.sh
@@ -19,7 +19,6 @@ path_GNU="$(readlink -fm -- "${path_GNU:-${path_UUTILS}/../gnu}")"
 ###
 
 # On MacOS there is no system /usr/bin/timeout
-
 # and trying to add it to /usr/bin (with symlink of copy binary) will fail unless system integrity protection is disabled (not ideal)
 # ref: https://support.apple.com/en-us/102149
 # On MacOS the Homebrew coreutils could be installed and then "sudo ln -s /opt/homebrew/bin/timeout /usr/local/bin/timeout"

From a0a9ee6491ca3402e9fdc3af5b7b3f5def3b8268 Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Tue, 5 Sep 2023 18:38:22 -0400
Subject: [PATCH 26/36] split: fixing tests for parse_size_max()

---
 tests/by-util/test_split.rs | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs
index 053b6f8bf1..2d6474511f 100644
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@@ -608,14 +608,6 @@ fn test_split_invalid_bytes_size() {
         .fails()
         .code_is(1)
         .stderr_only("split: invalid number of bytes: '1024R'\n");
-    #[cfg(not(target_pointer_width = "128"))]
-    new_ucmd!()
-        .args(&["-b", "1Y"])
-        .fails()
-        .code_is(1)
-        .stderr_only(
-            "split: invalid number of bytes: '1Y': Value too large for defined data type\n",
-        );
     #[cfg(target_pointer_width = "32")]
     {
         let sizes = ["1000G", "10T"];
@@ -625,6 +617,18 @@ fn test_split_invalid_bytes_size() {
     }
 }
 
+#[test]
+fn test_split_overflow_bytes_size() {
+    #[cfg(not(target_pointer_width = "128"))]
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "test_split_overflow_bytes_size";
+    RandomFile::new(&at, name).add_bytes(1000);
+    ucmd.args(&["-b", "1Y", name]).succeeds();
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 1);
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
 #[test]
 fn test_split_chunks_num_chunks_oversized_32() {
     #[cfg(target_pointer_width = "32")]

From eaae32ec3bfbf842933b706001368314ee221851 Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Tue, 5 Sep 2023 20:12:25 -0400
Subject: [PATCH 27/36] split: comments

---
 src/uu/split/src/split.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs
index 23520f7092..6fb05dabb5 100644
--- a/src/uu/split/src/split.rs
+++ b/src/uu/split/src/split.rs
@@ -354,7 +354,7 @@ enum NumberType {
 
     /// Assign lines via round-robin to the specified number of output
     /// chunks, but output only the *k*th chunk.
-    KthRoundRobin(u64, u64), // not yet implemented?
+    KthRoundRobin(u64, u64),
 }
 
 impl NumberType {

From d8a16a235149ab60099c93c889437314095426fb Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Wed, 6 Sep 2023 12:42:49 -0400
Subject: [PATCH 28/36] split: tests

---
 tests/by-util/test_split.rs | 99 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 98 insertions(+), 1 deletion(-)

diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs
index 2d6474511f..da4a4475be 100644
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@@ -377,6 +377,22 @@ fn test_split_obs_lines_standalone() {
     let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
     assert_eq!(glob.count(), 2);
     assert_eq!(glob.collate(), at.read_bytes(name));
+    ucmd.args(&["-99999999999999999991", name]).succeeds().no_stderr().no_stdout();
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 2);
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
+/// Test for obsolete lines option standalone overflow
+#[test]
+fn test_split_obs_lines_standalone_overflow() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "obs-lines-standalone";
+    RandomFile::new(&at, name).add_lines(4);
+    ucmd.args(&["-99999999999999999991", name]).succeeds().no_stderr().no_stdout();
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 1);
+    assert_eq!(glob.collate(), at.read_bytes(name));
 }
 
 /// Test for obsolete lines option as part of invalid combined short options
@@ -756,7 +772,7 @@ creating file 'xaf'
 }
 
 #[test]
-fn test_number() {
+fn test_number_n() {
     let (at, mut ucmd) = at_and_ucmd!();
     let file_read = |f| {
         let mut s = String::new();
@@ -771,6 +787,80 @@ fn test_number() {
     assert_eq!(file_read("xae"), "uvwxyz\n");
 }
 
+#[test]
+fn test_number_kth_of_n() {
+    new_ucmd!()
+        .args(&["--number=3/5", "asciilowercase.txt"])
+        .succeeds()
+        .stdout_only("klmno");
+    new_ucmd!()
+        .args(&["-e", "--number=99/100", "asciilowercase.txt"])
+        .succeeds()
+        .stdout_only("");
+    new_ucmd!()
+        .args(&[
+            "--number=r/9223372036854775807/18446744073709551615",
+            "asciilowercase.txt",
+        ])
+        .succeeds()
+        .stdout_only("");
+    new_ucmd!()
+        .args(&["--number=0/5", "asciilowercase.txt"])
+        .fails()
+        .stderr_contains("split: invalid chunk number: 0");
+    new_ucmd!()
+        .args(&["--number=10/5", "asciilowercase.txt"])
+        .fails()
+        .stderr_contains("split: invalid chunk number: 10");
+    new_ucmd!()
+        .args(&[
+            "--number=9223372036854775807/18446744073709551616",
+            "asciilowercase.txt",
+        ])
+        .fails()
+        .stderr_contains("split: invalid number of chunks: 18446744073709551616");
+}
+
+#[test]
+fn test_number_kth_of_n_round_robin() {
+    new_ucmd!()
+        .args(&["--number", "r/2/3", "fivelines.txt"])
+        .succeeds()
+        .stdout_only("2\n5\n");
+    new_ucmd!()
+        .args(&["--number", "r/1/4", "fivelines.txt"])
+        .succeeds()
+        .stdout_only("1\n5\n");
+    new_ucmd!()
+        .args(&["-e", "--number", "r/7/7", "fivelines.txt"])
+        .succeeds()
+        .stdout_only("");
+    new_ucmd!()
+        .args(&[
+            "--number",
+            "r/9223372036854775807/18446744073709551615",
+            "fivelines.txt",
+        ])
+        .succeeds()
+        .stdout_only("");
+    new_ucmd!()
+        .args(&[
+            "--number",
+            "r/9223372036854775807/18446744073709551616",
+            "fivelines.txt",
+        ])
+        .fails()
+        .stderr_contains("split: invalid number of chunks: 18446744073709551616");
+    new_ucmd!()
+        .args(&["--number", "r/0/3", "fivelines.txt"])
+        .fails()
+        .stderr_contains("split: invalid chunk number: 0");
+    new_ucmd!()
+        .args(&["--number", "r/10/3", "fivelines.txt"])
+        .fails()
+        .stderr_contains("split: invalid chunk number: 10");
+}
+
 #[test]
 fn test_split_number_with_io_blksize() {
     let (at, mut ucmd) = at_and_ucmd!();
@@ -927,6 +1017,13 @@ fn test_line_bytes() {
     assert_eq!(at.read("xad"), "ee\n");
 }
 
+#[test]
+fn test_line_bytes_overflow() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    ucmd.args(&["-C", "18446744073709551616", "letters.txt"]).succeeds();
+    assert_eq!(at.read("xaa"), "aaaaaaaaa\nbbbb\ncccc\ndd\nee\n");
+}
+
 #[test]
 fn test_line_bytes_concatenated_with_value() {
     let (at, mut ucmd) = at_and_ucmd!();

From e378454a265b103b98e940d5ed9b691985433f54 Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Wed, 6 Sep 2023 13:15:35 -0400
Subject: [PATCH 29/36] split: formatting

---
 tests/by-util/test_split.rs | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs
index da4a4475be..3ea74606ca 100644
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@@ -377,7 +377,10 @@ fn test_split_obs_lines_standalone() {
     let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
     assert_eq!(glob.count(), 2);
     assert_eq!(glob.collate(), at.read_bytes(name));
-    ucmd.args(&["-99999999999999999991", name]).succeeds().no_stderr().no_stdout();
+    ucmd.args(&["-99999999999999999991", name])
+        .succeeds()
+        .no_stderr()
+        .no_stdout();
     let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
     assert_eq!(glob.count(), 2);
     assert_eq!(glob.collate(), at.read_bytes(name));
@@ -389,7 +392,10 @@ fn test_split_obs_lines_standalone_overflow() {
     let (at, mut ucmd) = at_and_ucmd!();
     let name = "obs-lines-standalone";
     RandomFile::new(&at, name).add_lines(4);
-    ucmd.args(&["-99999999999999999991", name]).succeeds().no_stderr().no_stdout();
+    ucmd.args(&["-99999999999999999991", name])
+        .succeeds()
+        .no_stderr()
+        .no_stdout();
     let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
     assert_eq!(glob.count(), 1);
     assert_eq!(glob.collate(), at.read_bytes(name));
@@ -1020,7 +1026,8 @@ fn test_line_bytes() {
 #[test]
 fn test_line_bytes_overflow() {
     let (at, mut ucmd) = at_and_ucmd!();
-    ucmd.args(&["-C", "18446744073709551616", "letters.txt"]).succeeds();
+    ucmd.args(&["-C", "18446744073709551616", "letters.txt"])
+        .succeeds();
     assert_eq!(at.read("xaa"), "aaaaaaaaa\nbbbb\ncccc\ndd\nee\n");
 }
 

From 4fd598e4d513d3fec00876e3c897c7bd6024508f Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Wed, 6 Sep 2023 13:20:58 -0400
Subject: [PATCH 30/36] split: tests

---
 tests/by-util/test_split.rs | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs
index 3ea74606ca..c3e8445a62 100644
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@@ -377,13 +377,6 @@ fn test_split_obs_lines_standalone() {
     let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
     assert_eq!(glob.count(), 2);
     assert_eq!(glob.collate(), at.read_bytes(name));
-    ucmd.args(&["-99999999999999999991", name])
-        .succeeds()
-        .no_stderr()
-        .no_stdout();
-    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
-    assert_eq!(glob.count(), 2);
-    assert_eq!(glob.collate(), at.read_bytes(name));
 }
 
 /// Test for obsolete lines option standalone overflow

From 1669a92694985c8e2ad3df661fe457731698d172 Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Wed, 6 Sep 2023 14:02:08 -0400
Subject: [PATCH 31/36] split: tests overflow

---
 tests/by-util/test_split.rs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs
index c3e8445a62..849fdc7cfe 100644
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@@ -2,7 +2,7 @@
 //
 // For the full copyright and license information, please view the LICENSE
 // file that was distributed with this source code.
-// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes onehundredlines nbbbb dxen
+// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes onehundredlines nbbbb dxen ncccc
 
 use crate::common::util::{AtPath, TestScenario};
 use rand::{thread_rng, Rng, SeedableRng};
@@ -796,6 +796,7 @@ fn test_number_kth_of_n() {
         .args(&["-e", "--number=99/100", "asciilowercase.txt"])
         .succeeds()
         .stdout_only("");
+    #[cfg(target_pointer_width = "64")]
     new_ucmd!()
         .args(&[
             "--number=r/9223372036854775807/18446744073709551615",
@@ -811,6 +812,7 @@ fn test_number_kth_of_n() {
         .args(&["--number=10/5", "asciilowercase.txt"])
         .fails()
         .stderr_contains("split: invalid chunk number: 10");
+    #[cfg(target_pointer_width = "64")]
     new_ucmd!()
         .args(&[
             "--number=9223372036854775807/18446744073709551616",
@@ -834,6 +836,7 @@ fn test_number_kth_of_n_round_robin() {
         .args(&["-e", "--number", "r/7/7", "fivelines.txt"])
         .succeeds()
         .stdout_only("");
+    #[cfg(target_pointer_width = "64")]
     new_ucmd!()
         .args(&[
             "--number",
@@ -842,6 +845,7 @@ fn test_number_kth_of_n_round_robin() {
         ])
         .succeeds()
         .stdout_only("");
+    #[cfg(target_pointer_width = "64")]
     new_ucmd!()
         .args(&[
             "--number",
@@ -1018,6 +1022,7 @@ fn test_line_bytes() {
 
 #[test]
 fn test_line_bytes_overflow() {
+    #[cfg(target_pointer_width = "64")]
     let (at, mut ucmd) = at_and_ucmd!();
     ucmd.args(&["-C", "18446744073709551616", "letters.txt"])
         .succeeds();

From fbf5ac4329b83c8c6e6ffb3f87ab7c2fea09e263 Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Wed, 6 Sep 2023 14:16:21 -0400
Subject: [PATCH 32/36] split: tests 32bit

---
 tests/by-util/test_split.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs
index 849fdc7cfe..247b3cc578 100644
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@@ -1021,8 +1021,8 @@ fn test_line_bytes() {
 }
 
 #[test]
+#[cfg(target_pointer_width = "64")]
 fn test_line_bytes_overflow() {
-    #[cfg(target_pointer_width = "64")]
     let (at, mut ucmd) = at_and_ucmd!();
     ucmd.args(&["-C", "18446744073709551616", "letters.txt"])
         .succeeds();

From e40e88702270930470d99301118e03eaba37ca6d Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Wed, 6 Sep 2023 18:43:20 -0400
Subject: [PATCH 33/36] split: some refactoring for handle_obsolete()

---
 src/uu/split/src/split.rs | 241 +++++++++++++++++++++++---------------
 1 file changed, 146 insertions(+), 95 deletions(-)

diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs
index 6fb05dabb5..65032dfd9e 100644
--- a/src/uu/split/src/split.rs
+++ b/src/uu/split/src/split.rs
@@ -75,104 +75,152 @@ fn handle_obsolete(args: impl uucore::Args) -> (Vec<OsString>, Option<String>) {
     let mut obs_lines = None;
     let mut preceding_long_opt_req_value = false;
     let mut preceding_short_opt_req_value = false;
+
     let filtered_args = args
         .filter_map(|os_slice| {
-            let filter: Option<OsString>;
-            if let Some(slice) = os_slice.to_str() {
-                // check if the slice is a true short option (and not hyphen prefixed value of an option)
-                // and if so, a short option that can contain obsolete lines value
-                if slice.starts_with('-')
-                    && !slice.starts_with("--")
-                    && !preceding_long_opt_req_value
-                    && !preceding_short_opt_req_value
-                    && !slice.starts_with("-a")
-                    && !slice.starts_with("-b")
-                    && !slice.starts_with("-C")
-                    && !slice.starts_with("-l")
-                    && !slice.starts_with("-n")
-                {
-                    // start of the short option string
-                    // that can have obsolete lines option value in it
-                    // extract numeric part and filter it out
-                    let mut obs_lines_extracted: Vec<char> = vec![];
-                    let mut obs_lines_end_reached = false;
-                    let filtered_slice: Vec<char> = slice
-                        .chars()
-                        .filter(|c| {
-                            // To correctly process scenario like '-x200a4'
-                            // we need to stop extracting digits once alphabetic character is encountered
-                            // after we already have something in obs_lines_extracted
-                            if c.is_ascii_digit() && !obs_lines_end_reached {
-                                obs_lines_extracted.push(*c);
-                                false
-                            } else {
-                                if !obs_lines_extracted.is_empty() {
-                                    obs_lines_end_reached = true;
-                                }
-                                true
-                            }
-                        })
-                        .collect();
-
-                    if obs_lines_extracted.is_empty() {
-                        // no obsolete lines value found/extracted
-                        filter = Some(OsString::from(slice));
-                    } else {
-                        // obsolete lines value was extracted
-                        obs_lines = Some(obs_lines_extracted.iter().collect());
-                        if filtered_slice.get(1).is_some() {
-                            // there were some short options in front of or after obsolete lines value
-                            // i.e. '-xd100' or '-100de' or similar, which after extraction of obsolete lines value
-                            // would look like '-xd' or '-de' or similar
-                            let filtered_slice: String = filtered_slice.iter().collect();
-                            filter = Some(OsString::from(filtered_slice));
-                        } else {
-                            filter = None;
-                        }
-                    }
-                } else {
-                    // either not a short option
-                    // or a short option that cannot have obsolete lines value in it
-                    filter = Some(OsString::from(slice));
-                }
-                // capture if current slice is a preceding long option that requires value and does not use '=' to assign that value
-                // following slice should be treaded as value for this option
-                // even if it starts with '-' (which would be treated as hyphen prefixed value)
-                if slice.starts_with("--") {
-                    preceding_long_opt_req_value = &slice[2..] == OPT_BYTES
-                        || &slice[2..] == OPT_LINE_BYTES
-                        || &slice[2..] == OPT_LINES
-                        || &slice[2..] == OPT_ADDITIONAL_SUFFIX
-                        || &slice[2..] == OPT_FILTER
-                        || &slice[2..] == OPT_NUMBER
-                        || &slice[2..] == OPT_SUFFIX_LENGTH;
-                }
-                // capture if current slice is a preceding short option that requires value and does not have value in the same slice (value separated by whitespace)
-                // following slice should be treaded as value for this option
-                // even if it starts with '-' (which would be treated as hyphen prefixed value)
-                preceding_short_opt_req_value = slice == "-b"
-                    || slice == "-C"
-                    || slice == "-l"
-                    || slice == "-n"
-                    || slice == "-a";
-                // slice is a value
-                // reset preceding option flags
-                if !slice.starts_with('-') {
-                    preceding_short_opt_req_value = false;
-                    preceding_long_opt_req_value = false;
-                }
+            filter_args(
+                os_slice,
+                &mut obs_lines,
+                &mut preceding_long_opt_req_value,
+                &mut preceding_short_opt_req_value,
+            )
+        })
+        .collect();
+
+    (filtered_args, obs_lines)
+}
+
+/// Helper function to [`handle_obsolete`]
+/// Filters out obsolete lines option from args
+fn filter_args(
+    os_slice: OsString,
+    obs_lines: &mut Option<String>,
+    preceding_long_opt_req_value: &mut bool,
+    preceding_short_opt_req_value: &mut bool,
+) -> Option<OsString> {
+    let filter: Option<OsString>;
+    if let Some(slice) = os_slice.to_str() {
+        if should_extract_obs_lines(
+            slice,
+            preceding_long_opt_req_value,
+            preceding_short_opt_req_value,
+        ) {
+            // start of the short option string
+            // that can have obsolete lines option value in it
+            filter = handle_extract_obs_lines(slice, obs_lines);
+        } else {
+            // either not a short option
+            // or a short option that cannot have obsolete lines value in it
+            filter = Some(OsString::from(slice));
+        }
+        handle_preceding_options(
+            slice,
+            preceding_long_opt_req_value,
+            preceding_short_opt_req_value,
+        );
+    } else {
+        // Cannot cleanly convert os_slice to UTF-8
+        // Do not process and return as-is
+        // This will cause failure later on, but we should not handle it here
+        // and let clap panic on invalid UTF-8 argument
+        filter = Some(os_slice);
+    }
+    filter
+}
+
+/// Helper function to [`filter_args`]
+/// Checks if the slice is a true short option (and not hyphen prefixed value of an option)
+/// and if so, a short option that can contain obsolete lines value
+fn should_extract_obs_lines(
+    slice: &str,
+    preceding_long_opt_req_value: &bool,
+    preceding_short_opt_req_value: &bool,
+) -> bool {
+    slice.starts_with('-')
+        && !slice.starts_with("--")
+        && !preceding_long_opt_req_value
+        && !preceding_short_opt_req_value
+        && !slice.starts_with("-a")
+        && !slice.starts_with("-b")
+        && !slice.starts_with("-C")
+        && !slice.starts_with("-l")
+        && !slice.starts_with("-n")
+}
+
+/// Helper function to [`filter_args`]
+/// Extracts obsolete lines numeric part from argument slice
+/// and filters it out
+fn handle_extract_obs_lines(slice: &str, obs_lines: &mut Option<String>) -> Option<OsString> {
+    let mut obs_lines_extracted: Vec<char> = vec![];
+    let mut obs_lines_end_reached = false;
+    let filtered_slice: Vec<char> = slice
+        .chars()
+        .filter(|c| {
+            // To correctly process scenario like '-x200a4'
+            // we need to stop extracting digits once alphabetic character is encountered
+            // after we already have something in obs_lines_extracted
+            if c.is_ascii_digit() && !obs_lines_end_reached {
+                obs_lines_extracted.push(*c);
+                false
             } else {
-                // Cannot cleanly convert os_slice to UTF-8
-                // Do not process and return as-is
-                // This will cause failure later on, but we should not handle it here
-                // and let clap panic on invalid UTF-8 argument
-                filter = Some(os_slice);
+                if !obs_lines_extracted.is_empty() {
+                    obs_lines_end_reached = true;
+                }
+                true
             }
-            // return filter
-            filter
         })
         .collect();
-    (filtered_args, obs_lines)
+
+    if obs_lines_extracted.is_empty() {
+        // no obsolete lines value found/extracted
+        Some(OsString::from(slice))
+    } else {
+        // obsolete lines value was extracted
+        let extracted: String = obs_lines_extracted.iter().collect();
+        *obs_lines = Some(extracted);
+        if filtered_slice.get(1).is_some() {
+            // there were some short options in front of or after obsolete lines value
+            // i.e. '-xd100' or '-100de' or similar, which after extraction of obsolete lines value
+            // would look like '-xd' or '-de' or similar
+            let filtered_slice: String = filtered_slice.iter().collect();
+            Some(OsString::from(filtered_slice))
+        } else {
+            None
+        }
+    }
+}
+
+/// Helper function to [`handle_extract_obs_lines`]
+/// Captures if current slice is a preceding option
+/// that requires value
+fn handle_preceding_options(
+    slice: &str,
+    preceding_long_opt_req_value: &mut bool,
+    preceding_short_opt_req_value: &mut bool,
+) {
+    // capture if current slice is a preceding long option that requires value and does not use '=' to assign that value
+    // following slice should be treaded as value for this option
+    // even if it starts with '-' (which would be treated as hyphen prefixed value)
+    if slice.starts_with("--") {
+        *preceding_long_opt_req_value = &slice[2..] == OPT_BYTES
+            || &slice[2..] == OPT_LINE_BYTES
+            || &slice[2..] == OPT_LINES
+            || &slice[2..] == OPT_ADDITIONAL_SUFFIX
+            || &slice[2..] == OPT_FILTER
+            || &slice[2..] == OPT_NUMBER
+            || &slice[2..] == OPT_SUFFIX_LENGTH;
+    }
+    // capture if current slice is a preceding short option that requires value and does not have value in the same slice (value separated by whitespace)
+    // following slice should be treaded as value for this option
+    // even if it starts with '-' (which would be treated as hyphen prefixed value)
+    *preceding_short_opt_req_value =
+        slice == "-b" || slice == "-C" || slice == "-l" || slice == "-n" || slice == "-a";
+    // slice is a value
+    // reset preceding option flags
+    if !slice.starts_with('-') {
+        *preceding_short_opt_req_value = false;
+        *preceding_long_opt_req_value = false;
+    }
 }
 
 pub fn uu_app() -> Command {
@@ -430,6 +478,9 @@ impl NumberType {
     /// or if `K` is greater than `N`
     /// then this function returns [`NumberTypeError`].
     fn from(s: &str) -> Result<Self, NumberTypeError> {
+        fn is_invalid_chunk(chunk_number: u64, num_chunks: u64) -> bool {
+            chunk_number > num_chunks || chunk_number == 0
+        }
         let parts: Vec<&str> = s.split('/').collect();
         match &parts[..] {
             [n_str] => {
@@ -446,7 +497,7 @@ impl NumberType {
                     .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?;
                 let chunk_number = parse_size(k_str)
                     .map_err(|_| NumberTypeError::ChunkNumber(k_str.to_string()))?;
-                if chunk_number > num_chunks || chunk_number == 0 {
+                if is_invalid_chunk(chunk_number, num_chunks) {
                     return Err(NumberTypeError::ChunkNumber(k_str.to_string()));
                 }
                 Ok(Self::KthBytes(chunk_number, num_chunks))
@@ -461,7 +512,7 @@ impl NumberType {
                     .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?;
                 let chunk_number = parse_size(k_str)
                     .map_err(|_| NumberTypeError::ChunkNumber(k_str.to_string()))?;
-                if chunk_number > num_chunks || chunk_number == 0 {
+                if is_invalid_chunk(chunk_number, num_chunks) {
                     return Err(NumberTypeError::ChunkNumber(k_str.to_string()));
                 }
                 Ok(Self::KthLines(chunk_number, num_chunks))
@@ -476,7 +527,7 @@ impl NumberType {
                     .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?;
                 let chunk_number = parse_size(k_str)
                     .map_err(|_| NumberTypeError::ChunkNumber(k_str.to_string()))?;
-                if chunk_number > num_chunks || chunk_number == 0 {
+                if is_invalid_chunk(chunk_number, num_chunks) {
                     return Err(NumberTypeError::ChunkNumber(k_str.to_string()));
                 }
                 Ok(Self::KthRoundRobin(chunk_number, num_chunks))

From 3be284e0d94eb0c8d61e7ab684a9ff1bec5a4196 Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Wed, 6 Sep 2023 19:49:26 -0400
Subject: [PATCH 34/36] split: more test coverage

---
 src/uu/split/src/split.rs   |  2 +-
 tests/by-util/test_split.rs | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs
index 65032dfd9e..7762789ac6 100644
--- a/src/uu/split/src/split.rs
+++ b/src/uu/split/src/split.rs
@@ -1295,7 +1295,7 @@ where
     // If we would have written zero chunks of output, then terminate
     // immediately. This happens on `split -e -n 3 /dev/null`, for
     // example.
-    if num_chunks == 0 {
+    if num_chunks == 0 || num_bytes == 0 {
         return Ok(());
     }
 
diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs
index 247b3cc578..bbac3c7392 100644
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@@ -340,6 +340,18 @@ fn test_split_lines_number() {
         .succeeds()
         .no_stderr()
         .no_stdout();
+    scene
+        .ucmd()
+        .args(&["--lines", "0", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_only("split: invalid number of lines: 0\n");
+    scene
+        .ucmd()
+        .args(&["-0", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_only("split: invalid number of lines: 0\n");
     scene
         .ucmd()
         .args(&["--lines", "2fb", "file"])
@@ -669,6 +681,15 @@ fn test_split_stdin_num_chunks() {
         .stderr_only("split: -: cannot determine file size\n");
 }
 
+#[test]
+fn test_split_stdin_num_kth_chunk() {
+    new_ucmd!()
+        .args(&["--number=1/2"])
+        .fails()
+        .code_is(1)
+        .stderr_only("split: -: cannot determine file size\n");
+}
+
 fn file_read(at: &AtPath, filename: &str) -> String {
     let mut s = String::new();
     at.open(filename).read_to_string(&mut s).unwrap();
@@ -784,6 +805,10 @@ fn test_number_n() {
     assert_eq!(file_read("xac"), "klmno");
     assert_eq!(file_read("xad"), "pqrst");
     assert_eq!(file_read("xae"), "uvwxyz\n");
+    new_ucmd!()
+        .args(&["--number=100", "/dev/null"])
+        .succeeds()
+        .stdout_only("");
 }
 
 #[test]
@@ -792,10 +817,18 @@ fn test_number_kth_of_n() {
         .args(&["--number=3/5", "asciilowercase.txt"])
         .succeeds()
         .stdout_only("klmno");
+    new_ucmd!()
+        .args(&["--number=5/5", "asciilowercase.txt"])
+        .succeeds()
+        .stdout_only("uvwxyz\n");
     new_ucmd!()
         .args(&["-e", "--number=99/100", "asciilowercase.txt"])
         .succeeds()
         .stdout_only("");
+    new_ucmd!()
+        .args(&["--number=3/10", "/dev/null"])
+        .succeeds()
+        .stdout_only("");
     #[cfg(target_pointer_width = "64")]
     new_ucmd!()
         .args(&[

From 8883f016d4ffd9929949130ab724a20b1c3cb132 Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Wed, 6 Sep 2023 20:09:26 -0400
Subject: [PATCH 35/36] split: fix windows tests

---
 tests/by-util/test_split.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs
index bbac3c7392..c204a6238d 100644
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@@ -805,6 +805,7 @@ fn test_number_n() {
     assert_eq!(file_read("xac"), "klmno");
     assert_eq!(file_read("xad"), "pqrst");
     assert_eq!(file_read("xae"), "uvwxyz\n");
+    #[cfg(unix)]
     new_ucmd!()
         .args(&["--number=100", "/dev/null"])
         .succeeds()
@@ -825,6 +826,7 @@ fn test_number_kth_of_n() {
         .args(&["-e", "--number=99/100", "asciilowercase.txt"])
         .succeeds()
         .stdout_only("");
+    #[cfg(unix)]
     new_ucmd!()
         .args(&["--number=3/10", "/dev/null"])
         .succeeds()

From 3f065eed8a265e24ea8790a8680dd62410c5f024 Mon Sep 17 00:00:00 2001
From: zhitkoff <zhitkoff@gmail.com>
Date: Wed, 6 Sep 2023 21:04:01 -0400
Subject: [PATCH 36/36] split: fixing test for 32bit

---
 tests/by-util/test_split.rs | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs
index c204a6238d..5eba5071f6 100644
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@@ -657,19 +657,17 @@ fn test_split_overflow_bytes_size() {
 }
 
 #[test]
+#[cfg(target_pointer_width = "32")]
 fn test_split_chunks_num_chunks_oversized_32() {
-    #[cfg(target_pointer_width = "32")]
-    {
-        let scene = TestScenario::new(util_name!());
-        let at = &scene.fixtures;
-        at.touch("file");
-        scene
-            .ucmd()
-            .args(&["--number", "5000000000", "file"])
-            .fails()
-            .code_is(1)
-            .stderr_only("split: Number of chunks too big\n");
-    }
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    at.touch("file");
+    scene
+        .ucmd()
+        .args(&["--number", "5000000000", "sixhundredfiftyonebytes.txt"])
+        .fails()
+        .code_is(1)
+        .stderr_only("split: Number of chunks too big\n");
 }
 
 #[test]