diff --git a/utils/datasets.py b/utils/datasets.py
index ed18f449ddd3..4f5fbb4bc824 100755
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -1033,19 +1033,32 @@ def extract_boxes(path='../coco128/'):  # from utils.datasets import *; extract_
                     assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'
 
 
-def autosplit(path='../coco128', weights=(0.9, 0.1, 0.0)):  # from utils.datasets import *; autosplit('../coco128')
+def autosplit(path='../coco128', weights=(0.9, 0.1, 0.0), annotated_only=False): # from utils.datasets import *; autosplit('../coco128')
+
     """ Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
     # Arguments
-        path:       Path to images directory
-        weights:    Train, val, test weights (list)
+        path:           Path to images directory
+        weights:        Train, val, test weights (list)
+        annotated_only: Only use images with an annotated txt file
     """
+
     path = Path(path)  # images dir
-    files = list(path.rglob('*.*'))
+
+    # make sure we only work with images files
+    files = sum([list(path.rglob(f"*.{img_ext}")) for img_ext in img_formats], [])
     n = len(files)  # number of files
+
     indices = random.choices([0, 1, 2], weights=weights, k=n)  # assign each image to a split
+
     txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt']  # 3 txt files
     [(path / x).unlink() for x in txt if (path / x).exists()]  # remove existing
+
+    if annotated_only:
+        print("Only annotated images with a .txt file associated will be used to create the dataset")
+
     for i, img in tqdm(zip(indices, files), total=n):
-        if img.suffix[1:] in img_formats:
+        # in case we want to use only annotated files
+        if not annotated_only or (annotated_only and Path(img2label_paths([str(img)])[0]).exists()):
             with open(path / txt[i], 'a') as f:
                 f.write(str(img) + '\n')  # add image to txt file
+