From 0be233f27d0b3d2572c156244dbfc3defbeac98c Mon Sep 17 00:00:00 2001
From: Jonathan Langlois <jlanglois@lunit.io>
Date: Thu, 24 Aug 2023 09:24:05 +0900
Subject: [PATCH 1/2] fix: avoid copying files with same name prefix

---
 gcsfs/core.py            | 15 +++------------
 gcsfs/tests/test_core.py |  7 ++++---
 2 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/gcsfs/core.py b/gcsfs/core.py
index e8098bf3..a8364f91 100644
--- a/gcsfs/core.py
+++ b/gcsfs/core.py
@@ -835,18 +835,6 @@ async def _info(self, path, generation=None, **kwargs):
         else:
             raise FileNotFoundError(path)
 
-    async def _glob(self, path, prefix="", **kwargs):
-        if not prefix:
-            # Identify pattern prefixes. Ripped from fsspec.spec.AbstractFileSystem.glob and matches
-            # the glob.has_magic patterns.
-            indstar = path.find("*") if path.find("*") >= 0 else len(path)
-            indques = path.find("?") if path.find("?") >= 0 else len(path)
-            indbrace = path.find("[") if path.find("[") >= 0 else len(path)
-
-            ind = min(indstar, indques, indbrace)
-            prefix = path[:ind].split("/")[-1]
-        return await super()._glob(path, prefix=prefix, **kwargs)
-
     async def _ls(self, path, detail=False, prefix="", versions=False, **kwargs):
         """List objects under the given '/{bucket}/{prefix} path."""
         path = self._strip_protocol(path).rstrip("/")
@@ -1252,6 +1240,9 @@ async def _find(
         else:
             _prefix = key
 
+        if _prefix != "" and await self._isdir(f"{bucket}/{_prefix}"):
+            _prefix = _prefix.rstrip("/") + "/"
+
         objects, _ = await self._do_list_objects(
             bucket, delimiter="", prefix=_prefix, versions=versions
         )
diff --git a/gcsfs/tests/test_core.py b/gcsfs/tests/test_core.py
index 51956e80..5b0e6761 100644
--- a/gcsfs/tests/test_core.py
+++ b/gcsfs/tests/test_core.py
@@ -285,10 +285,11 @@ def test_gcs_glob(gcs):
         for f in gcs.glob(TEST_BUCKET + "/nested/*")
         if gcs.isfile(f)
     )
+    # the following is no longer true since the glob method list the root path
     # Ensure the glob only fetches prefixed folders
-    gcs.dircache.clear()
-    gcs.glob(TEST_BUCKET + "/nested**1")
-    assert all(d.startswith(TEST_BUCKET + "/nested") for d in gcs.dircache)
+    # gcs.dircache.clear()
+    # gcs.glob(TEST_BUCKET + "/nested**1")
+    # assert all(d.startswith(TEST_BUCKET + "/nested") for d in gcs.dircache)
     # the following is no longer true as of #437
     # gcs.glob(TEST_BUCKET + "/test*")
     # assert TEST_BUCKET + "/test" in gcs.dircache

From d1ff25f9870a5a09f287a68bd52183a699c41dfd Mon Sep 17 00:00:00 2001
From: Jonathan Langlois <jlanglois@lunit.io>
Date: Thu, 31 Aug 2023 08:38:58 +0900
Subject: [PATCH 2/2] feat: avoid calling isdir before fetching objects

---
 gcsfs/core.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/gcsfs/core.py b/gcsfs/core.py
index 09ba37b9..97ecb697 100644
--- a/gcsfs/core.py
+++ b/gcsfs/core.py
@@ -1387,24 +1387,27 @@ async def _find(
         **kwargs,
     ):
         path = self._strip_protocol(path)
-        bucket, key, generation = self.split_path(path)
 
         if maxdepth is not None and maxdepth < 1:
             raise ValueError("maxdepth must be at least 1")
 
-        if prefix:
-            _path = "" if not key else key.rstrip("/") + "/"
-            _prefix = f"{_path}{prefix}"
-        else:
-            _prefix = key
-
-        if _prefix != "" and await self._isdir(f"{bucket}/{_prefix}"):
-            _prefix = _prefix.rstrip("/") + "/"
-
+        # Fetch objects as if the path is a directory
         objects, _ = await self._do_list_objects(
-            bucket, delimiter="", prefix=_prefix, versions=versions
+            path, delimiter="", prefix=prefix, versions=versions
         )
 
+        if not objects:
+            # Fetch objects as if the path is a file
+            bucket, key, _ = self.split_path(path)
+            if prefix:
+                _path = "" if not key else key.rstrip("/") + "/"
+                _prefix = f"{_path}{prefix}"
+            else:
+                _prefix = key
+            objects, _ = await self._do_list_objects(
+                bucket, delimiter="", prefix=_prefix, versions=versions
+            )
+
         dirs = {}
         cache_entries = {}