diff --git a/data/Argoverse.yaml b/data/Argoverse.yaml
new file mode 100644
index 000000000000..558151dc849e
--- /dev/null
+++ b/data/Argoverse.yaml
@@ -0,0 +1,74 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/ by Argo AI
+# Example usage: python train.py --data Argoverse.yaml
+# parent
+# ├── yolov5
+# └── datasets
+#     └── Argoverse  ← downloads here (31.3 GB)
+
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/Argoverse  # dataset root dir
+train: Argoverse-1.1/images/train/  # train images (relative to 'path') 39384 images
+val: Argoverse-1.1/images/val/  # val images (relative to 'path') 15062 images
+test: Argoverse-1.1/images/test/  # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview
+
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: bus
+  5: truck
+  6: traffic_light
+  7: stop_sign
+
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  import json
+
+  from tqdm import tqdm
+  from utils.general import download, Path
+
+
+  def argoverse2yolo(set):
+      labels = {}
+      a = json.load(open(set, "rb"))
+      for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."):
+          img_id = annot['image_id']
+          img_name = a['images'][img_id]['name']
+          img_label_name = f'{img_name[:-3]}txt'
+
+          cls = annot['category_id']  # instance class id
+          x_center, y_center, width, height = annot['bbox']
+          x_center = (x_center + width / 2) / 1920.0  # offset and scale
+          y_center = (y_center + height / 2) / 1200.0  # offset and scale
+          width /= 1920.0  # scale
+          height /= 1200.0  # scale
+
+          img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']]
+          if not img_dir.exists():
+              img_dir.mkdir(parents=True, exist_ok=True)
+
+          k = str(img_dir / img_label_name)
+          if k not in labels:
+              labels[k] = []
+          labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n")
+
+      for k in labels:
+          with open(k, "w") as f:
+              f.writelines(labels[k])
+
+
+  # Download
+  dir = Path(yaml['path'])  # dataset root dir
+  urls = ['https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip']
+  download(urls, dir=dir, delete=False)
+
+  # Convert
+  annotations_dir = 'Argoverse-HD/annotations/'
+  (dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images')  # rename 'tracking' to 'images'
+  for d in "train.json", "val.json":
+      argoverse2yolo(dir / annotations_dir / d)  # convert VisDrone annotations to YOLO labels
diff --git a/data/GlobalWheat2020.yaml b/data/GlobalWheat2020.yaml
new file mode 100644
index 000000000000..01812d031bc5
--- /dev/null
+++ b/data/GlobalWheat2020.yaml
@@ -0,0 +1,54 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Global Wheat 2020 dataset http://www.global-wheat.com/ by University of Saskatchewan
+# Example usage: python train.py --data GlobalWheat2020.yaml
+# parent
+# ├── yolov5
+# └── datasets
+#     └── GlobalWheat2020  ← downloads here (7.0 GB)
+
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/GlobalWheat2020  # dataset root dir
+train: # train images (relative to 'path') 3422 images
+  - images/arvalis_1
+  - images/arvalis_2
+  - images/arvalis_3
+  - images/ethz_1
+  - images/rres_1
+  - images/inrae_1
+  - images/usask_1
+val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1)
+  - images/ethz_1
+test: # test images (optional) 1276 images
+  - images/utokyo_1
+  - images/utokyo_2
+  - images/nau_1
+  - images/uq_1
+
+# Classes
+names:
+  0: wheat_head
+
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  from utils.general import download, Path
+
+
+  # Download
+  dir = Path(yaml['path'])  # dataset root dir
+  urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
+          'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
+  download(urls, dir=dir)
+
+  # Make Directories
+  for p in 'annotations', 'images', 'labels':
+      (dir / p).mkdir(parents=True, exist_ok=True)
+
+  # Move
+  for p in 'arvalis_1', 'arvalis_2', 'arvalis_3', 'ethz_1', 'rres_1', 'inrae_1', 'usask_1', \
+           'utokyo_1', 'utokyo_2', 'nau_1', 'uq_1':
+      (dir / p).rename(dir / 'images' / p)  # move to /images
+      f = (dir / p).with_suffix('.json')  # json file
+      if f.exists():
+          f.rename((dir / 'annotations' / p).with_suffix('.json'))  # move to /annotations
diff --git a/data/ImageNet.yaml b/data/ImageNet.yaml
new file mode 100644
index 000000000000..14f12950605f
--- /dev/null
+++ b/data/ImageNet.yaml
@@ -0,0 +1,1022 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# ImageNet-1k dataset https://www.image-net.org/index.php by Stanford University
+# Simplified class names from https://github.com/anishathalye/imagenet-simple-labels
+# Example usage: python classify/train.py --data imagenet
+# parent
+# ├── yolov5
+# └── datasets
+#     └── imagenet  ← downloads here (144 GB)
+
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/imagenet  # dataset root dir
+train: train  # train images (relative to 'path') 1281167 images
+val: val  # val images (relative to 'path') 50000 images
+test:  # test images (optional)
+
+# Classes
+names:
+  0: tench
+  1: goldfish
+  2: great white shark
+  3: tiger shark
+  4: hammerhead shark
+  5: electric ray
+  6: stingray
+  7: cock
+  8: hen
+  9: ostrich
+  10: brambling
+  11: goldfinch
+  12: house finch
+  13: junco
+  14: indigo bunting
+  15: American robin
+  16: bulbul
+  17: jay
+  18: magpie
+  19: chickadee
+  20: American dipper
+  21: kite
+  22: bald eagle
+  23: vulture
+  24: great grey owl
+  25: fire salamander
+  26: smooth newt
+  27: newt
+  28: spotted salamander
+  29: axolotl
+  30: American bullfrog
+  31: tree frog
+  32: tailed frog
+  33: loggerhead sea turtle
+  34: leatherback sea turtle
+  35: mud turtle
+  36: terrapin
+  37: box turtle
+  38: banded gecko
+  39: green iguana
+  40: Carolina anole
+  41: desert grassland whiptail lizard
+  42: agama
+  43: frilled-necked lizard
+  44: alligator lizard
+  45: Gila monster
+  46: European green lizard
+  47: chameleon
+  48: Komodo dragon
+  49: Nile crocodile
+  50: American alligator
+  51: triceratops
+  52: worm snake
+  53: ring-necked snake
+  54: eastern hog-nosed snake
+  55: smooth green snake
+  56: kingsnake
+  57: garter snake
+  58: water snake
+  59: vine snake
+  60: night snake
+  61: boa constrictor
+  62: African rock python
+  63: Indian cobra
+  64: green mamba
+  65: sea snake
+  66: Saharan horned viper
+  67: eastern diamondback rattlesnake
+  68: sidewinder
+  69: trilobite
+  70: harvestman
+  71: scorpion
+  72: yellow garden spider
+  73: barn spider
+  74: European garden spider
+  75: southern black widow
+  76: tarantula
+  77: wolf spider
+  78: tick
+  79: centipede
+  80: black grouse
+  81: ptarmigan
+  82: ruffed grouse
+  83: prairie grouse
+  84: peacock
+  85: quail
+  86: partridge
+  87: grey parrot
+  88: macaw
+  89: sulphur-crested cockatoo
+  90: lorikeet
+  91: coucal
+  92: bee eater
+  93: hornbill
+  94: hummingbird
+  95: jacamar
+  96: toucan
+  97: duck
+  98: red-breasted merganser
+  99: goose
+  100: black swan
+  101: tusker
+  102: echidna
+  103: platypus
+  104: wallaby
+  105: koala
+  106: wombat
+  107: jellyfish
+  108: sea anemone
+  109: brain coral
+  110: flatworm
+  111: nematode
+  112: conch
+  113: snail
+  114: slug
+  115: sea slug
+  116: chiton
+  117: chambered nautilus
+  118: Dungeness crab
+  119: rock crab
+  120: fiddler crab
+  121: red king crab
+  122: American lobster
+  123: spiny lobster
+  124: crayfish
+  125: hermit crab
+  126: isopod
+  127: white stork
+  128: black stork
+  129: spoonbill
+  130: flamingo
+  131: little blue heron
+  132: great egret
+  133: bittern
+  134: crane (bird)
+  135: limpkin
+  136: common gallinule
+  137: American coot
+  138: bustard
+  139: ruddy turnstone
+  140: dunlin
+  141: common redshank
+  142: dowitcher
+  143: oystercatcher
+  144: pelican
+  145: king penguin
+  146: albatross
+  147: grey whale
+  148: killer whale
+  149: dugong
+  150: sea lion
+  151: Chihuahua
+  152: Japanese Chin
+  153: Maltese
+  154: Pekingese
+  155: Shih Tzu
+  156: King Charles Spaniel
+  157: Papillon
+  158: toy terrier
+  159: Rhodesian Ridgeback
+  160: Afghan Hound
+  161: Basset Hound
+  162: Beagle
+  163: Bloodhound
+  164: Bluetick Coonhound
+  165: Black and Tan Coonhound
+  166: Treeing Walker Coonhound
+  167: English foxhound
+  168: Redbone Coonhound
+  169: borzoi
+  170: Irish Wolfhound
+  171: Italian Greyhound
+  172: Whippet
+  173: Ibizan Hound
+  174: Norwegian Elkhound
+  175: Otterhound
+  176: Saluki
+  177: Scottish Deerhound
+  178: Weimaraner
+  179: Staffordshire Bull Terrier
+  180: American Staffordshire Terrier
+  181: Bedlington Terrier
+  182: Border Terrier
+  183: Kerry Blue Terrier
+  184: Irish Terrier
+  185: Norfolk Terrier
+  186: Norwich Terrier
+  187: Yorkshire Terrier
+  188: Wire Fox Terrier
+  189: Lakeland Terrier
+  190: Sealyham Terrier
+  191: Airedale Terrier
+  192: Cairn Terrier
+  193: Australian Terrier
+  194: Dandie Dinmont Terrier
+  195: Boston Terrier
+  196: Miniature Schnauzer
+  197: Giant Schnauzer
+  198: Standard Schnauzer
+  199: Scottish Terrier
+  200: Tibetan Terrier
+  201: Australian Silky Terrier
+  202: Soft-coated Wheaten Terrier
+  203: West Highland White Terrier
+  204: Lhasa Apso
+  205: Flat-Coated Retriever
+  206: Curly-coated Retriever
+  207: Golden Retriever
+  208: Labrador Retriever
+  209: Chesapeake Bay Retriever
+  210: German Shorthaired Pointer
+  211: Vizsla
+  212: English Setter
+  213: Irish Setter
+  214: Gordon Setter
+  215: Brittany
+  216: Clumber Spaniel
+  217: English Springer Spaniel
+  218: Welsh Springer Spaniel
+  219: Cocker Spaniels
+  220: Sussex Spaniel
+  221: Irish Water Spaniel
+  222: Kuvasz
+  223: Schipperke
+  224: Groenendael
+  225: Malinois
+  226: Briard
+  227: Australian Kelpie
+  228: Komondor
+  229: Old English Sheepdog
+  230: Shetland Sheepdog
+  231: collie
+  232: Border Collie
+  233: Bouvier des Flandres
+  234: Rottweiler
+  235: German Shepherd Dog
+  236: Dobermann
+  237: Miniature Pinscher
+  238: Greater Swiss Mountain Dog
+  239: Bernese Mountain Dog
+  240: Appenzeller Sennenhund
+  241: Entlebucher Sennenhund
+  242: Boxer
+  243: Bullmastiff
+  244: Tibetan Mastiff
+  245: French Bulldog
+  246: Great Dane
+  247: St. Bernard
+  248: husky
+  249: Alaskan Malamute
+  250: Siberian Husky
+  251: Dalmatian
+  252: Affenpinscher
+  253: Basenji
+  254: pug
+  255: Leonberger
+  256: Newfoundland
+  257: Pyrenean Mountain Dog
+  258: Samoyed
+  259: Pomeranian
+  260: Chow Chow
+  261: Keeshond
+  262: Griffon Bruxellois
+  263: Pembroke Welsh Corgi
+  264: Cardigan Welsh Corgi
+  265: Toy Poodle
+  266: Miniature Poodle
+  267: Standard Poodle
+  268: Mexican hairless dog
+  269: grey wolf
+  270: Alaskan tundra wolf
+  271: red wolf
+  272: coyote
+  273: dingo
+  274: dhole
+  275: African wild dog
+  276: hyena
+  277: red fox
+  278: kit fox
+  279: Arctic fox
+  280: grey fox
+  281: tabby cat
+  282: tiger cat
+  283: Persian cat
+  284: Siamese cat
+  285: Egyptian Mau
+  286: cougar
+  287: lynx
+  288: leopard
+  289: snow leopard
+  290: jaguar
+  291: lion
+  292: tiger
+  293: cheetah
+  294: brown bear
+  295: American black bear
+  296: polar bear
+  297: sloth bear
+  298: mongoose
+  299: meerkat
+  300: tiger beetle
+  301: ladybug
+  302: ground beetle
+  303: longhorn beetle
+  304: leaf beetle
+  305: dung beetle
+  306: rhinoceros beetle
+  307: weevil
+  308: fly
+  309: bee
+  310: ant
+  311: grasshopper
+  312: cricket
+  313: stick insect
+  314: cockroach
+  315: mantis
+  316: cicada
+  317: leafhopper
+  318: lacewing
+  319: dragonfly
+  320: damselfly
+  321: red admiral
+  322: ringlet
+  323: monarch butterfly
+  324: small white
+  325: sulphur butterfly
+  326: gossamer-winged butterfly
+  327: starfish
+  328: sea urchin
+  329: sea cucumber
+  330: cottontail rabbit
+  331: hare
+  332: Angora rabbit
+  333: hamster
+  334: porcupine
+  335: fox squirrel
+  336: marmot
+  337: beaver
+  338: guinea pig
+  339: common sorrel
+  340: zebra
+  341: pig
+  342: wild boar
+  343: warthog
+  344: hippopotamus
+  345: ox
+  346: water buffalo
+  347: bison
+  348: ram
+  349: bighorn sheep
+  350: Alpine ibex
+  351: hartebeest
+  352: impala
+  353: gazelle
+  354: dromedary
+  355: llama
+  356: weasel
+  357: mink
+  358: European polecat
+  359: black-footed ferret
+  360: otter
+  361: skunk
+  362: badger
+  363: armadillo
+  364: three-toed sloth
+  365: orangutan
+  366: gorilla
+  367: chimpanzee
+  368: gibbon
+  369: siamang
+  370: guenon
+  371: patas monkey
+  372: baboon
+  373: macaque
+  374: langur
+  375: black-and-white colobus
+  376: proboscis monkey
+  377: marmoset
+  378: white-headed capuchin
+  379: howler monkey
+  380: titi
+  381: Geoffroy's spider monkey
+  382: common squirrel monkey
+  383: ring-tailed lemur
+  384: indri
+  385: Asian elephant
+  386: African bush elephant
+  387: red panda
+  388: giant panda
+  389: snoek
+  390: eel
+  391: coho salmon
+  392: rock beauty
+  393: clownfish
+  394: sturgeon
+  395: garfish
+  396: lionfish
+  397: pufferfish
+  398: abacus
+  399: abaya
+  400: academic gown
+  401: accordion
+  402: acoustic guitar
+  403: aircraft carrier
+  404: airliner
+  405: airship
+  406: altar
+  407: ambulance
+  408: amphibious vehicle
+  409: analog clock
+  410: apiary
+  411: apron
+  412: waste container
+  413: assault rifle
+  414: backpack
+  415: bakery
+  416: balance beam
+  417: balloon
+  418: ballpoint pen
+  419: Band-Aid
+  420: banjo
+  421: baluster
+  422: barbell
+  423: barber chair
+  424: barbershop
+  425: barn
+  426: barometer
+  427: barrel
+  428: wheelbarrow
+  429: baseball
+  430: basketball
+  431: bassinet
+  432: bassoon
+  433: swimming cap
+  434: bath towel
+  435: bathtub
+  436: station wagon
+  437: lighthouse
+  438: beaker
+  439: military cap
+  440: beer bottle
+  441: beer glass
+  442: bell-cot
+  443: bib
+  444: tandem bicycle
+  445: bikini
+  446: ring binder
+  447: binoculars
+  448: birdhouse
+  449: boathouse
+  450: bobsleigh
+  451: bolo tie
+  452: poke bonnet
+  453: bookcase
+  454: bookstore
+  455: bottle cap
+  456: bow
+  457: bow tie
+  458: brass
+  459: bra
+  460: breakwater
+  461: breastplate
+  462: broom
+  463: bucket
+  464: buckle
+  465: bulletproof vest
+  466: high-speed train
+  467: butcher shop
+  468: taxicab
+  469: cauldron
+  470: candle
+  471: cannon
+  472: canoe
+  473: can opener
+  474: cardigan
+  475: car mirror
+  476: carousel
+  477: tool kit
+  478: carton
+  479: car wheel
+  480: automated teller machine
+  481: cassette
+  482: cassette player
+  483: castle
+  484: catamaran
+  485: CD player
+  486: cello
+  487: mobile phone
+  488: chain
+  489: chain-link fence
+  490: chain mail
+  491: chainsaw
+  492: chest
+  493: chiffonier
+  494: chime
+  495: china cabinet
+  496: Christmas stocking
+  497: church
+  498: movie theater
+  499: cleaver
+  500: cliff dwelling
+  501: cloak
+  502: clogs
+  503: cocktail shaker
+  504: coffee mug
+  505: coffeemaker
+  506: coil
+  507: combination lock
+  508: computer keyboard
+  509: confectionery store
+  510: container ship
+  511: convertible
+  512: corkscrew
+  513: cornet
+  514: cowboy boot
+  515: cowboy hat
+  516: cradle
+  517: crane (machine)
+  518: crash helmet
+  519: crate
+  520: infant bed
+  521: Crock Pot
+  522: croquet ball
+  523: crutch
+  524: cuirass
+  525: dam
+  526: desk
+  527: desktop computer
+  528: rotary dial telephone
+  529: diaper
+  530: digital clock
+  531: digital watch
+  532: dining table
+  533: dishcloth
+  534: dishwasher
+  535: disc brake
+  536: dock
+  537: dog sled
+  538: dome
+  539: doormat
+  540: drilling rig
+  541: drum
+  542: drumstick
+  543: dumbbell
+  544: Dutch oven
+  545: electric fan
+  546: electric guitar
+  547: electric locomotive
+  548: entertainment center
+  549: envelope
+  550: espresso machine
+  551: face powder
+  552: feather boa
+  553: filing cabinet
+  554: fireboat
+  555: fire engine
+  556: fire screen sheet
+  557: flagpole
+  558: flute
+  559: folding chair
+  560: football helmet
+  561: forklift
+  562: fountain
+  563: fountain pen
+  564: four-poster bed
+  565: freight car
+  566: French horn
+  567: frying pan
+  568: fur coat
+  569: garbage truck
+  570: gas mask
+  571: gas pump
+  572: goblet
+  573: go-kart
+  574: golf ball
+  575: golf cart
+  576: gondola
+  577: gong
+  578: gown
+  579: grand piano
+  580: greenhouse
+  581: grille
+  582: grocery store
+  583: guillotine
+  584: barrette
+  585: hair spray
+  586: half-track
+  587: hammer
+  588: hamper
+  589: hair dryer
+  590: hand-held computer
+  591: handkerchief
+  592: hard disk drive
+  593: harmonica
+  594: harp
+  595: harvester
+  596: hatchet
+  597: holster
+  598: home theater
+  599: honeycomb
+  600: hook
+  601: hoop skirt
+  602: horizontal bar
+  603: horse-drawn vehicle
+  604: hourglass
+  605: iPod
+  606: clothes iron
+  607: jack-o'-lantern
+  608: jeans
+  609: jeep
+  610: T-shirt
+  611: jigsaw puzzle
+  612: pulled rickshaw
+  613: joystick
+  614: kimono
+  615: knee pad
+  616: knot
+  617: lab coat
+  618: ladle
+  619: lampshade
+  620: laptop computer
+  621: lawn mower
+  622: lens cap
+  623: paper knife
+  624: library
+  625: lifeboat
+  626: lighter
+  627: limousine
+  628: ocean liner
+  629: lipstick
+  630: slip-on shoe
+  631: lotion
+  632: speaker
+  633: loupe
+  634: sawmill
+  635: magnetic compass
+  636: mail bag
+  637: mailbox
+  638: tights
+  639: tank suit
+  640: manhole cover
+  641: maraca
+  642: marimba
+  643: mask
+  644: match
+  645: maypole
+  646: maze
+  647: measuring cup
+  648: medicine chest
+  649: megalith
+  650: microphone
+  651: microwave oven
+  652: military uniform
+  653: milk can
+  654: minibus
+  655: miniskirt
+  656: minivan
+  657: missile
+  658: mitten
+  659: mixing bowl
+  660: mobile home
+  661: Model T
+  662: modem
+  663: monastery
+  664: monitor
+  665: moped
+  666: mortar
+  667: square academic cap
+  668: mosque
+  669: mosquito net
+  670: scooter
+  671: mountain bike
+  672: tent
+  673: computer mouse
+  674: mousetrap
+  675: moving van
+  676: muzzle
+  677: nail
+  678: neck brace
+  679: necklace
+  680: nipple
+  681: notebook computer
+  682: obelisk
+  683: oboe
+  684: ocarina
+  685: odometer
+  686: oil filter
+  687: organ
+  688: oscilloscope
+  689: overskirt
+  690: bullock cart
+  691: oxygen mask
+  692: packet
+  693: paddle
+  694: paddle wheel
+  695: padlock
+  696: paintbrush
+  697: pajamas
+  698: palace
+  699: pan flute
+  700: paper towel
+  701: parachute
+  702: parallel bars
+  703: park bench
+  704: parking meter
+  705: passenger car
+  706: patio
+  707: payphone
+  708: pedestal
+  709: pencil case
+  710: pencil sharpener
+  711: perfume
+  712: Petri dish
+  713: photocopier
+  714: plectrum
+  715: Pickelhaube
+  716: picket fence
+  717: pickup truck
+  718: pier
+  719: piggy bank
+  720: pill bottle
+  721: pillow
+  722: ping-pong ball
+  723: pinwheel
+  724: pirate ship
+  725: pitcher
+  726: hand plane
+  727: planetarium
+  728: plastic bag
+  729: plate rack
+  730: plow
+  731: plunger
+  732: Polaroid camera
+  733: pole
+  734: police van
+  735: poncho
+  736: billiard table
+  737: soda bottle
+  738: pot
+  739: potter's wheel
+  740: power drill
+  741: prayer rug
+  742: printer
+  743: prison
+  744: projectile
+  745: projector
+  746: hockey puck
+  747: punching bag
+  748: purse
+  749: quill
+  750: quilt
+  751: race car
+  752: racket
+  753: radiator
+  754: radio
+  755: radio telescope
+  756: rain barrel
+  757: recreational vehicle
+  758: reel
+  759: reflex camera
+  760: refrigerator
+  761: remote control
+  762: restaurant
+  763: revolver
+  764: rifle
+  765: rocking chair
+  766: rotisserie
+  767: eraser
+  768: rugby ball
+  769: ruler
+  770: running shoe
+  771: safe
+  772: safety pin
+  773: salt shaker
+  774: sandal
+  775: sarong
+  776: saxophone
+  777: scabbard
+  778: weighing scale
+  779: school bus
+  780: schooner
+  781: scoreboard
+  782: CRT screen
+  783: screw
+  784: screwdriver
+  785: seat belt
+  786: sewing machine
+  787: shield
+  788: shoe store
+  789: shoji
+  790: shopping basket
+  791: shopping cart
+  792: shovel
+  793: shower cap
+  794: shower curtain
+  795: ski
+  796: ski mask
+  797: sleeping bag
+  798: slide rule
+  799: sliding door
+  800: slot machine
+  801: snorkel
+  802: snowmobile
+  803: snowplow
+  804: soap dispenser
+  805: soccer ball
+  806: sock
+  807: solar thermal collector
+  808: sombrero
+  809: soup bowl
+  810: space bar
+  811: space heater
+  812: space shuttle
+  813: spatula
+  814: motorboat
+  815: spider web
+  816: spindle
+  817: sports car
+  818: spotlight
+  819: stage
+  820: steam locomotive
+  821: through arch bridge
+  822: steel drum
+  823: stethoscope
+  824: scarf
+  825: stone wall
+  826: stopwatch
+  827: stove
+  828: strainer
+  829: tram
+  830: stretcher
+  831: couch
+  832: stupa
+  833: submarine
+  834: suit
+  835: sundial
+  836: sunglass
+  837: sunglasses
+  838: sunscreen
+  839: suspension bridge
+  840: mop
+  841: sweatshirt
+  842: swimsuit
+  843: swing
+  844: switch
+  845: syringe
+  846: table lamp
+  847: tank
+  848: tape player
+  849: teapot
+  850: teddy bear
+  851: television
+  852: tennis ball
+  853: thatched roof
+  854: front curtain
+  855: thimble
+  856: threshing machine
+  857: throne
+  858: tile roof
+  859: toaster
+  860: tobacco shop
+  861: toilet seat
+  862: torch
+  863: totem pole
+  864: tow truck
+  865: toy store
+  866: tractor
+  867: semi-trailer truck
+  868: tray
+  869: trench coat
+  870: tricycle
+  871: trimaran
+  872: tripod
+  873: triumphal arch
+  874: trolleybus
+  875: trombone
+  876: tub
+  877: turnstile
+  878: typewriter keyboard
+  879: umbrella
+  880: unicycle
+  881: upright piano
+  882: vacuum cleaner
+  883: vase
+  884: vault
+  885: velvet
+  886: vending machine
+  887: vestment
+  888: viaduct
+  889: violin
+  890: volleyball
+  891: waffle iron
+  892: wall clock
+  893: wallet
+  894: wardrobe
+  895: military aircraft
+  896: sink
+  897: washing machine
+  898: water bottle
+  899: water jug
+  900: water tower
+  901: whiskey jug
+  902: whistle
+  903: wig
+  904: window screen
+  905: window shade
+  906: Windsor tie
+  907: wine bottle
+  908: wing
+  909: wok
+  910: wooden spoon
+  911: wool
+  912: split-rail fence
+  913: shipwreck
+  914: yawl
+  915: yurt
+  916: website
+  917: comic book
+  918: crossword
+  919: traffic sign
+  920: traffic light
+  921: dust jacket
+  922: menu
+  923: plate
+  924: guacamole
+  925: consomme
+  926: hot pot
+  927: trifle
+  928: ice cream
+  929: ice pop
+  930: baguette
+  931: bagel
+  932: pretzel
+  933: cheeseburger
+  934: hot dog
+  935: mashed potato
+  936: cabbage
+  937: broccoli
+  938: cauliflower
+  939: zucchini
+  940: spaghetti squash
+  941: acorn squash
+  942: butternut squash
+  943: cucumber
+  944: artichoke
+  945: bell pepper
+  946: cardoon
+  947: mushroom
+  948: Granny Smith
+  949: strawberry
+  950: orange
+  951: lemon
+  952: fig
+  953: pineapple
+  954: banana
+  955: jackfruit
+  956: custard apple
+  957: pomegranate
+  958: hay
+  959: carbonara
+  960: chocolate syrup
+  961: dough
+  962: meatloaf
+  963: pizza
+  964: pot pie
+  965: burrito
+  966: red wine
+  967: espresso
+  968: cup
+  969: eggnog
+  970: alp
+  971: bubble
+  972: cliff
+  973: coral reef
+  974: geyser
+  975: lakeshore
+  976: promontory
+  977: shoal
+  978: seashore
+  979: valley
+  980: volcano
+  981: baseball player
+  982: bridegroom
+  983: scuba diver
+  984: rapeseed
+  985: daisy
+  986: yellow lady's slipper
+  987: corn
+  988: acorn
+  989: rose hip
+  990: horse chestnut seed
+  991: coral fungus
+  992: agaric
+  993: gyromitra
+  994: stinkhorn mushroom
+  995: earth star
+  996: hen-of-the-woods
+  997: bolete
+  998: ear
+  999: toilet paper
+
+
+# Download script/URL (optional)
+download: data/scripts/get_imagenet.sh
diff --git a/data/Objects365.yaml b/data/Objects365.yaml
new file mode 100644
index 000000000000..05b26a1f4796
--- /dev/null
+++ b/data/Objects365.yaml
@@ -0,0 +1,438 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Objects365 dataset https://www.objects365.org/ by Megvii
+# Example usage: python train.py --data Objects365.yaml
+# parent
+# ├── yolov5
+# └── datasets
+#     └── Objects365  ← downloads here (712 GB = 367G data + 345G zips)
+
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/Objects365  # dataset root dir
+train: images/train  # train images (relative to 'path') 1742289 images
+val: images/val # val images (relative to 'path') 80000 images
+test:  # test images (optional)
+
+# Classes
+names:
+  0: Person
+  1: Sneakers
+  2: Chair
+  3: Other Shoes
+  4: Hat
+  5: Car
+  6: Lamp
+  7: Glasses
+  8: Bottle
+  9: Desk
+  10: Cup
+  11: Street Lights
+  12: Cabinet/shelf
+  13: Handbag/Satchel
+  14: Bracelet
+  15: Plate
+  16: Picture/Frame
+  17: Helmet
+  18: Book
+  19: Gloves
+  20: Storage box
+  21: Boat
+  22: Leather Shoes
+  23: Flower
+  24: Bench
+  25: Potted Plant
+  26: Bowl/Basin
+  27: Flag
+  28: Pillow
+  29: Boots
+  30: Vase
+  31: Microphone
+  32: Necklace
+  33: Ring
+  34: SUV
+  35: Wine Glass
+  36: Belt
+  37: Monitor/TV
+  38: Backpack
+  39: Umbrella
+  40: Traffic Light
+  41: Speaker
+  42: Watch
+  43: Tie
+  44: Trash bin Can
+  45: Slippers
+  46: Bicycle
+  47: Stool
+  48: Barrel/bucket
+  49: Van
+  50: Couch
+  51: Sandals
+  52: Basket
+  53: Drum
+  54: Pen/Pencil
+  55: Bus
+  56: Wild Bird
+  57: High Heels
+  58: Motorcycle
+  59: Guitar
+  60: Carpet
+  61: Cell Phone
+  62: Bread
+  63: Camera
+  64: Canned
+  65: Truck
+  66: Traffic cone
+  67: Cymbal
+  68: Lifesaver
+  69: Towel
+  70: Stuffed Toy
+  71: Candle
+  72: Sailboat
+  73: Laptop
+  74: Awning
+  75: Bed
+  76: Faucet
+  77: Tent
+  78: Horse
+  79: Mirror
+  80: Power outlet
+  81: Sink
+  82: Apple
+  83: Air Conditioner
+  84: Knife
+  85: Hockey Stick
+  86: Paddle
+  87: Pickup Truck
+  88: Fork
+  89: Traffic Sign
+  90: Balloon
+  91: Tripod
+  92: Dog
+  93: Spoon
+  94: Clock
+  95: Pot
+  96: Cow
+  97: Cake
+  98: Dinning Table
+  99: Sheep
+  100: Hanger
+  101: Blackboard/Whiteboard
+  102: Napkin
+  103: Other Fish
+  104: Orange/Tangerine
+  105: Toiletry
+  106: Keyboard
+  107: Tomato
+  108: Lantern
+  109: Machinery Vehicle
+  110: Fan
+  111: Green Vegetables
+  112: Banana
+  113: Baseball Glove
+  114: Airplane
+  115: Mouse
+  116: Train
+  117: Pumpkin
+  118: Soccer
+  119: Skiboard
+  120: Luggage
+  121: Nightstand
+  122: Tea pot
+  123: Telephone
+  124: Trolley
+  125: Head Phone
+  126: Sports Car
+  127: Stop Sign
+  128: Dessert
+  129: Scooter
+  130: Stroller
+  131: Crane
+  132: Remote
+  133: Refrigerator
+  134: Oven
+  135: Lemon
+  136: Duck
+  137: Baseball Bat
+  138: Surveillance Camera
+  139: Cat
+  140: Jug
+  141: Broccoli
+  142: Piano
+  143: Pizza
+  144: Elephant
+  145: Skateboard
+  146: Surfboard
+  147: Gun
+  148: Skating and Skiing shoes
+  149: Gas stove
+  150: Donut
+  151: Bow Tie
+  152: Carrot
+  153: Toilet
+  154: Kite
+  155: Strawberry
+  156: Other Balls
+  157: Shovel
+  158: Pepper
+  159: Computer Box
+  160: Toilet Paper
+  161: Cleaning Products
+  162: Chopsticks
+  163: Microwave
+  164: Pigeon
+  165: Baseball
+  166: Cutting/chopping Board
+  167: Coffee Table
+  168: Side Table
+  169: Scissors
+  170: Marker
+  171: Pie
+  172: Ladder
+  173: Snowboard
+  174: Cookies
+  175: Radiator
+  176: Fire Hydrant
+  177: Basketball
+  178: Zebra
+  179: Grape
+  180: Giraffe
+  181: Potato
+  182: Sausage
+  183: Tricycle
+  184: Violin
+  185: Egg
+  186: Fire Extinguisher
+  187: Candy
+  188: Fire Truck
+  189: Billiards
+  190: Converter
+  191: Bathtub
+  192: Wheelchair
+  193: Golf Club
+  194: Briefcase
+  195: Cucumber
+  196: Cigar/Cigarette
+  197: Paint Brush
+  198: Pear
+  199: Heavy Truck
+  200: Hamburger
+  201: Extractor
+  202: Extension Cord
+  203: Tong
+  204: Tennis Racket
+  205: Folder
+  206: American Football
+  207: earphone
+  208: Mask
+  209: Kettle
+  210: Tennis
+  211: Ship
+  212: Swing
+  213: Coffee Machine
+  214: Slide
+  215: Carriage
+  216: Onion
+  217: Green beans
+  218: Projector
+  219: Frisbee
+  220: Washing Machine/Drying Machine
+  221: Chicken
+  222: Printer
+  223: Watermelon
+  224: Saxophone
+  225: Tissue
+  226: Toothbrush
+  227: Ice cream
+  228: Hot-air balloon
+  229: Cello
+  230: French Fries
+  231: Scale
+  232: Trophy
+  233: Cabbage
+  234: Hot dog
+  235: Blender
+  236: Peach
+  237: Rice
+  238: Wallet/Purse
+  239: Volleyball
+  240: Deer
+  241: Goose
+  242: Tape
+  243: Tablet
+  244: Cosmetics
+  245: Trumpet
+  246: Pineapple
+  247: Golf Ball
+  248: Ambulance
+  249: Parking meter
+  250: Mango
+  251: Key
+  252: Hurdle
+  253: Fishing Rod
+  254: Medal
+  255: Flute
+  256: Brush
+  257: Penguin
+  258: Megaphone
+  259: Corn
+  260: Lettuce
+  261: Garlic
+  262: Swan
+  263: Helicopter
+  264: Green Onion
+  265: Sandwich
+  266: Nuts
+  267: Speed Limit Sign
+  268: Induction Cooker
+  269: Broom
+  270: Trombone
+  271: Plum
+  272: Rickshaw
+  273: Goldfish
+  274: Kiwi fruit
+  275: Router/modem
+  276: Poker Card
+  277: Toaster
+  278: Shrimp
+  279: Sushi
+  280: Cheese
+  281: Notepaper
+  282: Cherry
+  283: Pliers
+  284: CD
+  285: Pasta
+  286: Hammer
+  287: Cue
+  288: Avocado
+  289: Hamimelon
+  290: Flask
+  291: Mushroom
+  292: Screwdriver
+  293: Soap
+  294: Recorder
+  295: Bear
+  296: Eggplant
+  297: Board Eraser
+  298: Coconut
+  299: Tape Measure/Ruler
+  300: Pig
+  301: Showerhead
+  302: Globe
+  303: Chips
+  304: Steak
+  305: Crosswalk Sign
+  306: Stapler
+  307: Camel
+  308: Formula 1
+  309: Pomegranate
+  310: Dishwasher
+  311: Crab
+  312: Hoverboard
+  313: Meat ball
+  314: Rice Cooker
+  315: Tuba
+  316: Calculator
+  317: Papaya
+  318: Antelope
+  319: Parrot
+  320: Seal
+  321: Butterfly
+  322: Dumbbell
+  323: Donkey
+  324: Lion
+  325: Urinal
+  326: Dolphin
+  327: Electric Drill
+  328: Hair Dryer
+  329: Egg tart
+  330: Jellyfish
+  331: Treadmill
+  332: Lighter
+  333: Grapefruit
+  334: Game board
+  335: Mop
+  336: Radish
+  337: Baozi
+  338: Target
+  339: French
+  340: Spring Rolls
+  341: Monkey
+  342: Rabbit
+  343: Pencil Case
+  344: Yak
+  345: Red Cabbage
+  346: Binoculars
+  347: Asparagus
+  348: Barbell
+  349: Scallop
+  350: Noddles
+  351: Comb
+  352: Dumpling
+  353: Oyster
+  354: Table Tennis paddle
+  355: Cosmetics Brush/Eyeliner Pencil
+  356: Chainsaw
+  357: Eraser
+  358: Lobster
+  359: Durian
+  360: Okra
+  361: Lipstick
+  362: Cosmetics Mirror
+  363: Curling
+  364: Table Tennis
+
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  from tqdm import tqdm
+
+  from utils.general import Path, check_requirements, download, np, xyxy2xywhn
+
+  check_requirements(('pycocotools>=2.0',))
+  from pycocotools.coco import COCO
+
+  # Make Directories
+  dir = Path(yaml['path'])  # dataset root dir
+  for p in 'images', 'labels':
+      (dir / p).mkdir(parents=True, exist_ok=True)
+      for q in 'train', 'val':
+          (dir / p / q).mkdir(parents=True, exist_ok=True)
+
+  # Train, Val Splits
+  for split, patches in [('train', 50 + 1), ('val', 43 + 1)]:
+      print(f"Processing {split} in {patches} patches ...")
+      images, labels = dir / 'images' / split, dir / 'labels' / split
+
+      # Download
+      url = f"https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/{split}/"
+      if split == 'train':
+          download([f'{url}zhiyuan_objv2_{split}.tar.gz'], dir=dir, delete=False)  # annotations json
+          download([f'{url}patch{i}.tar.gz' for i in range(patches)], dir=images, curl=True, delete=False, threads=8)
+      elif split == 'val':
+          download([f'{url}zhiyuan_objv2_{split}.json'], dir=dir, delete=False)  # annotations json
+          download([f'{url}images/v1/patch{i}.tar.gz' for i in range(15 + 1)], dir=images, curl=True, delete=False, threads=8)
+          download([f'{url}images/v2/patch{i}.tar.gz' for i in range(16, patches)], dir=images, curl=True, delete=False, threads=8)
+
+      # Move
+      for f in tqdm(images.rglob('*.jpg'), desc=f'Moving {split} images'):
+          f.rename(images / f.name)  # move to /images/{split}
+
+      # Labels
+      coco = COCO(dir / f'zhiyuan_objv2_{split}.json')
+      names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
+      for cid, cat in enumerate(names):
+          catIds = coco.getCatIds(catNms=[cat])
+          imgIds = coco.getImgIds(catIds=catIds)
+          for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'):
+              width, height = im["width"], im["height"]
+              path = Path(im["file_name"])  # image filename
+              try:
+                  with open(labels / path.with_suffix('.txt').name, 'a') as file:
+                      annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None)
+                      for a in coco.loadAnns(annIds):
+                          x, y, w, h = a['bbox']  # bounding box in xywh (xy top-left corner)
+                          xyxy = np.array([x, y, x + w, y + h])[None]  # pixels(1,4)
+                          x, y, w, h = xyxy2xywhn(xyxy, w=width, h=height, clip=True)[0]  # normalized and clipped
+                          file.write(f"{cid} {x:.5f} {y:.5f} {w:.5f} {h:.5f}\n")
+              except Exception as e:
+                  print(e)
diff --git a/data/SKU-110K.yaml b/data/SKU-110K.yaml
new file mode 100644
index 000000000000..edae7171c660
--- /dev/null
+++ b/data/SKU-110K.yaml
@@ -0,0 +1,53 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 by Trax Retail
+# Example usage: python train.py --data SKU-110K.yaml
+# parent
+# ├── yolov5
+# └── datasets
+#     └── SKU-110K  ← downloads here (13.6 GB)
+
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/SKU-110K  # dataset root dir
+train: train.txt  # train images (relative to 'path')  8219 images
+val: val.txt  # val images (relative to 'path')  588 images
+test: test.txt  # test images (optional)  2936 images
+
+# Classes
+names:
+  0: object
+
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  import shutil
+  from tqdm import tqdm
+  from utils.general import np, pd, Path, download, xyxy2xywh
+
+
+  # Download
+  dir = Path(yaml['path'])  # dataset root dir
+  parent = Path(dir.parent)  # download dir
+  urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
+  download(urls, dir=parent, delete=False)
+
+  # Rename directories
+  if dir.exists():
+      shutil.rmtree(dir)
+  (parent / 'SKU110K_fixed').rename(dir)  # rename dir
+  (dir / 'labels').mkdir(parents=True, exist_ok=True)  # create labels dir
+
+  # Convert labels
+  names = 'image', 'x1', 'y1', 'x2', 'y2', 'class', 'image_width', 'image_height'  # column names
+  for d in 'annotations_train.csv', 'annotations_val.csv', 'annotations_test.csv':
+      x = pd.read_csv(dir / 'annotations' / d, names=names).values  # annotations
+      images, unique_images = x[:, 0], np.unique(x[:, 0])
+      with open((dir / d).with_suffix('.txt').__str__().replace('annotations_', ''), 'w') as f:
+          f.writelines(f'./images/{s}\n' for s in unique_images)
+      for im in tqdm(unique_images, desc=f'Converting {dir / d}'):
+          cls = 0  # single-class dataset
+          with open((dir / 'labels' / im).with_suffix('.txt'), 'a') as f:
+              for r in x[images == im]:
+                  w, h = r[6], r[7]  # image width, height
+                  xywh = xyxy2xywh(np.array([[r[1] / w, r[2] / h, r[3] / w, r[4] / h]]))[0]  # instance
+                  f.write(f"{cls} {xywh[0]:.5f} {xywh[1]:.5f} {xywh[2]:.5f} {xywh[3]:.5f}\n")  # write label
diff --git a/data/VOC.yaml b/data/VOC.yaml
new file mode 100644
index 000000000000..27d38109c53a
--- /dev/null
+++ b/data/VOC.yaml
@@ -0,0 +1,100 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
+# Example usage: python train.py --data VOC.yaml
+# parent
+# ├── yolov5
+# └── datasets
+#     └── VOC  ← downloads here (2.8 GB)
+
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/VOC
+train: # train images (relative to 'path')  16551 images
+  - images/train2012
+  - images/train2007
+  - images/val2012
+  - images/val2007
+val: # val images (relative to 'path')  4952 images
+  - images/test2007
+test: # test images (optional)
+  - images/test2007
+
+# Classes
+names:
+  0: aeroplane
+  1: bicycle
+  2: bird
+  3: boat
+  4: bottle
+  5: bus
+  6: car
+  7: cat
+  8: chair
+  9: cow
+  10: diningtable
+  11: dog
+  12: horse
+  13: motorbike
+  14: person
+  15: pottedplant
+  16: sheep
+  17: sofa
+  18: train
+  19: tvmonitor
+
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  import xml.etree.ElementTree as ET
+
+  from tqdm import tqdm
+  from utils.general import download, Path
+
+
+  def convert_label(path, lb_path, year, image_id):
+      def convert_box(size, box):
+          dw, dh = 1. / size[0], 1. / size[1]
+          x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
+          return x * dw, y * dh, w * dw, h * dh
+
+      in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
+      out_file = open(lb_path, 'w')
+      tree = ET.parse(in_file)
+      root = tree.getroot()
+      size = root.find('size')
+      w = int(size.find('width').text)
+      h = int(size.find('height').text)
+
+      names = list(yaml['names'].values())  # names list
+      for obj in root.iter('object'):
+          cls = obj.find('name').text
+          if cls in names and int(obj.find('difficult').text) != 1:
+              xmlbox = obj.find('bndbox')
+              bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
+              cls_id = names.index(cls)  # class id
+              out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')
+
+
+  # Download
+  dir = Path(yaml['path'])  # dataset root dir
+  url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
+  urls = [f'{url}VOCtrainval_06-Nov-2007.zip',  # 446MB, 5012 images
+          f'{url}VOCtest_06-Nov-2007.zip',  # 438MB, 4953 images
+          f'{url}VOCtrainval_11-May-2012.zip']  # 1.95GB, 17126 images
+  download(urls, dir=dir / 'images', delete=False, curl=True, threads=3)
+
+  # Convert
+  path = dir / 'images/VOCdevkit'
+  for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
+      imgs_path = dir / 'images' / f'{image_set}{year}'
+      lbs_path = dir / 'labels' / f'{image_set}{year}'
+      imgs_path.mkdir(exist_ok=True, parents=True)
+      lbs_path.mkdir(exist_ok=True, parents=True)
+
+      with open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt') as f:
+          image_ids = f.read().strip().split()
+      for id in tqdm(image_ids, desc=f'{image_set}{year}'):
+          f = path / f'VOC{year}/JPEGImages/{id}.jpg'  # old img path
+          lb_path = (lbs_path / f.name).with_suffix('.txt')  # new label path
+          f.rename(imgs_path / f.name)  # move image
+          convert_label(path, lb_path, year, id)  # convert labels to YOLO format
diff --git a/data/visdrone.yaml b/data/VisDrone.yaml
similarity index 66%
rename from data/visdrone.yaml
rename to data/VisDrone.yaml
index c23e6bc286f8..a8bcf8e628ec 100644
--- a/data/visdrone.yaml
+++ b/data/VisDrone.yaml
@@ -1,30 +1,35 @@
-# VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset
-# Train command: python train.py --data visdrone.yaml
-# Default dataset location is next to YOLOv5:
-#   /parent_folder
-#     /VisDrone
-#     /yolov5
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset by Tianjin University
+# Example usage: python train.py --data VisDrone.yaml
+# parent
+# ├── yolov5
+# └── datasets
+#     └── VisDrone  ← downloads here (2.3 GB)
 
 
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
-train: ../VisDrone/VisDrone2019-DET-train/images  # 6471 images
-val: ../VisDrone/VisDrone2019-DET-val/images  # 548 images
-test: ../VisDrone/VisDrone2019-DET-test-dev/images  # 1610 images
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/VisDrone  # dataset root dir
+train: VisDrone2019-DET-train/images  # train images (relative to 'path')  6471 images
+val: VisDrone2019-DET-val/images  # val images (relative to 'path')  548 images
+test: VisDrone2019-DET-test-dev/images  # test images (optional)  1610 images
 
-# number of classes
-nc: 10
+# Classes
+names:
+  0: pedestrian
+  1: people
+  2: bicycle
+  3: car
+  4: van
+  5: truck
+  6: tricycle
+  7: awning-tricycle
+  8: bus
+  9: motor
 
-# class names
-names: [ 'pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor' ]
 
-
-# download command/URL (optional) --------------------------------------------------------------------------------------
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
-  import os
-  from pathlib import Path
-
-  from utils.general import download
-
+  from utils.general import download, os, Path
 
   def visdrone2yolo(dir):
       from PIL import Image
@@ -53,12 +58,12 @@ download: |
 
 
   # Download
-  dir = Path('../VisDrone')  # dataset directory
+  dir = Path(yaml['path'])  # dataset root dir
   urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
           'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
           'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
           'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
-  download(urls, dir=dir)
+  download(urls, dir=dir, curl=True, threads=4)
 
   # Convert
   for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
diff --git a/data/argoverse_hd.yaml b/data/argoverse_hd.yaml
deleted file mode 100644
index 0ba314d82ce1..000000000000
--- a/data/argoverse_hd.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
-# Train command: python train.py --data argoverse_hd.yaml
-# Default dataset location is next to YOLOv5:
-#   /parent_folder
-#     /argoverse
-#     /yolov5
-
-
-# download command/URL (optional)
-download: bash data/scripts/get_argoverse_hd.sh
-
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
-train: ../argoverse/Argoverse-1.1/images/train/  # 39384 images
-val: ../argoverse/Argoverse-1.1/images/val/  # 15062 iamges
-test: ../argoverse/Argoverse-1.1/images/test/  # Submit to: https://eval.ai/web/challenges/challenge-page/800/overview
-
-# number of classes
-nc: 8
-
-# class names
-names: [ 'person',  'bicycle',  'car',  'motorcycle',  'bus',  'truck',  'traffic_light',  'stop_sign' ]
diff --git a/data/coco.yaml b/data/coco.yaml
index f818a49ff0fa..d64dfc7fed76 100644
--- a/data/coco.yaml
+++ b/data/coco.yaml
@@ -1,35 +1,116 @@
-# COCO 2017 dataset http://cocodataset.org
-# Train command: python train.py --data coco.yaml
-# Default dataset location is next to YOLOv5:
-#   /parent_folder
-#     /coco
-#     /yolov5
-
-
-# download command/URL (optional)
-download: bash data/scripts/get_coco.sh
-
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
-train: ../coco/train2017.txt  # 118287 images
-val: ../coco/val2017.txt  # 5000 images
-test: ../coco/test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
-
-# number of classes
-nc: 80
-
-# class names
-names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
-         'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
-         'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
-         'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
-         'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
-         'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
-         'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
-         'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
-         'hair drier', 'toothbrush' ]
-
-# Print classes
-# with open('data/coco.yaml') as f:
-#   d = yaml.safe_load(f)  # dict
-#   for i, x in enumerate(d['names']):
-#     print(i, x)
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# COCO 2017 dataset http://cocodataset.org by Microsoft
+# Example usage: python train.py --data coco.yaml
+# parent
+# ├── yolov5
+# └── datasets
+#     └── coco  ← downloads here (20.1 GB)
+
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco  # dataset root dir
+train: train2017.txt  # train images (relative to 'path') 118287 images
+val: val2017.txt  # val images (relative to 'path') 5000 images
+test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
+
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
+
+
+# Download script/URL (optional)
+download: |
+  from utils.general import download, Path
+
+
+  # Download labels
+  segments = False  # segment or box labels
+  dir = Path(yaml['path'])  # dataset root dir
+  url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
+  urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')]  # labels
+  download(urls, dir=dir.parent)
+
+  # Download data
+  urls = ['http://images.cocodataset.org/zips/train2017.zip',  # 19G, 118k images
+          'http://images.cocodataset.org/zips/val2017.zip',  # 1G, 5k images
+          'http://images.cocodataset.org/zips/test2017.zip']  # 7G, 41k images (optional)
+  download(urls, dir=dir / 'images', threads=3)
diff --git a/data/coco128-seg.yaml b/data/coco128-seg.yaml
new file mode 100644
index 000000000000..5e81910cc456
--- /dev/null
+++ b/data/coco128-seg.yaml
@@ -0,0 +1,101 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
+# Example usage: python train.py --data coco128.yaml
+# parent
+# ├── yolov5
+# └── datasets
+#     └── coco128-seg  ← downloads here (7 MB)
+
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco128-seg  # dataset root dir
+train: images/train2017  # train images (relative to 'path') 128 images
+val: images/train2017  # val images (relative to 'path') 128 images
+test:  # test images (optional)
+
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
+
+
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/coco128-seg.zip
diff --git a/data/coco128.yaml b/data/coco128.yaml
index 83fbc29d3404..12556736a571 100644
--- a/data/coco128.yaml
+++ b/data/coco128.yaml
@@ -1,28 +1,101 @@
-# COCO 2017 dataset http://cocodataset.org - first 128 training images
-# Train command: python train.py --data coco128.yaml
-# Default dataset location is next to YOLOv5:
-#   /parent_folder
-#     /coco128
-#     /yolov5
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
+# Example usage: python train.py --data coco128.yaml
+# parent
+# ├── yolov5
+# └── datasets
+#     └── coco128  ← downloads here (7 MB)
 
 
-# download command/URL (optional)
-download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco128  # dataset root dir
+train: images/train2017  # train images (relative to 'path') 128 images
+val: images/train2017  # val images (relative to 'path') 128 images
+test:  # test images (optional)
 
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
-train: ../coco128/images/train2017/  # 128 images
-val: ../coco128/images/train2017/  # 128 images
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
 
-# number of classes
-nc: 80
 
-# class names
-names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
-         'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
-         'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
-         'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
-         'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
-         'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
-         'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
-         'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
-         'hair drier', 'toothbrush' ]
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/coco128.zip
diff --git a/data/hyp.finetune.yaml b/data/hyp.finetune.yaml
deleted file mode 100644
index 043ca2eb71f6..000000000000
--- a/data/hyp.finetune.yaml
+++ /dev/null
@@ -1,32 +0,0 @@
-# Hyperparameters for VOC finetuning
-# python train.py --batch 64 --weights yolov5m.pt --data voc.yaml --img 512 --epochs 50
-# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
-
-lr0: 0.0032
-lrf: 0.12
-momentum: 0.843
-weight_decay: 0.00036
-warmup_epochs: 2.0
-warmup_momentum: 0.5
-warmup_bias_lr: 0.05
-box: 0.0296
-cls: 0.243
-cls_pw: 0.631
-obj: 0.301
-obj_pw: 0.911
-iou_t: 0.2
-anchor_t: 2.91
-# anchors: 3.63
-fl_gamma: 0.0
-hsv_h: 0.0138
-hsv_s: 0.664
-hsv_v: 0.464
-degrees: 0.373
-translate: 0.245
-scale: 0.898
-shear: 0.602
-perspective: 0.0
-flipud: 0.00856
-fliplr: 0.5
-mosaic: 1.0
-mixup: 0.243
diff --git a/data/hyps/hyp.Objects365.yaml b/data/hyps/hyp.Objects365.yaml
new file mode 100644
index 000000000000..74971740f7c7
--- /dev/null
+++ b/data/hyps/hyp.Objects365.yaml
@@ -0,0 +1,34 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Hyperparameters for Objects365 training
+# python train.py --weights yolov5m.pt --data Objects365.yaml --evolve
+# See Hyperparameter Evolution tutorial for details https://github.com/ultralytics/yolov5#tutorials
+
+lr0: 0.00258
+lrf: 0.17
+momentum: 0.779
+weight_decay: 0.00058
+warmup_epochs: 1.33
+warmup_momentum: 0.86
+warmup_bias_lr: 0.0711
+box: 0.0539
+cls: 0.299
+cls_pw: 0.825
+obj: 0.632
+obj_pw: 1.0
+iou_t: 0.2
+anchor_t: 3.44
+anchors: 3.2
+fl_gamma: 0.0
+hsv_h: 0.0188
+hsv_s: 0.704
+hsv_v: 0.36
+degrees: 0.0
+translate: 0.0902
+scale: 0.491
+shear: 0.0
+perspective: 0.0
+flipud: 0.0
+fliplr: 0.5
+mosaic: 1.0
+mixup: 0.0
+copy_paste: 0.0
diff --git a/data/hyps/hyp.VOC.yaml b/data/hyps/hyp.VOC.yaml
new file mode 100644
index 000000000000..0aa4e7d9f8f5
--- /dev/null
+++ b/data/hyps/hyp.VOC.yaml
@@ -0,0 +1,40 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Hyperparameters for VOC training
+# python train.py --batch 128 --weights yolov5m6.pt --data VOC.yaml --epochs 50 --img 512 --hyp hyp.scratch-med.yaml --evolve
+# See Hyperparameter Evolution tutorial for details https://github.com/ultralytics/yolov5#tutorials
+
+# YOLOv5 Hyperparameter Evolution Results
+# Best generation: 467
+# Last generation: 996
+#    metrics/precision,       metrics/recall,      metrics/mAP_0.5, metrics/mAP_0.5:0.95,         val/box_loss,         val/obj_loss,         val/cls_loss
+#              0.87729,              0.85125,              0.91286,              0.72664,            0.0076739,            0.0042529,            0.0013865
+
+lr0: 0.00334
+lrf: 0.15135
+momentum: 0.74832
+weight_decay: 0.00025
+warmup_epochs: 3.3835
+warmup_momentum: 0.59462
+warmup_bias_lr: 0.18657
+box: 0.02
+cls: 0.21638
+cls_pw: 0.5
+obj: 0.51728
+obj_pw: 0.67198
+iou_t: 0.2
+anchor_t: 3.3744
+fl_gamma: 0.0
+hsv_h: 0.01041
+hsv_s: 0.54703
+hsv_v: 0.27739
+degrees: 0.0
+translate: 0.04591
+scale: 0.75544
+shear: 0.0
+perspective: 0.0
+flipud: 0.0
+fliplr: 0.5
+mosaic: 0.85834
+mixup: 0.04266
+copy_paste: 0.0
+anchors: 3.412
diff --git a/data/hyps/hyp.no-augmentation.yaml b/data/hyps/hyp.no-augmentation.yaml
new file mode 100644
index 000000000000..8fbd5b262afa
--- /dev/null
+++ b/data/hyps/hyp.no-augmentation.yaml
@@ -0,0 +1,35 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Hyperparameters when using Albumentations frameworks
+# python train.py --hyp hyp.no-augmentation.yaml
+# See https://github.com/ultralytics/yolov5/pull/3882 for YOLOv5 + Albumentations Usage examples
+
+lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
+lrf: 0.1  # final OneCycleLR learning rate (lr0 * lrf)
+momentum: 0.937  # SGD momentum/Adam beta1
+weight_decay: 0.0005  # optimizer weight decay 5e-4
+warmup_epochs: 3.0  # warmup epochs (fractions ok)
+warmup_momentum: 0.8  # warmup initial momentum
+warmup_bias_lr: 0.1  # warmup initial bias lr
+box: 0.05  # box loss gain
+cls: 0.3  # cls loss gain
+cls_pw: 1.0  # cls BCELoss positive_weight
+obj: 0.7  # obj loss gain (scale with pixels)
+obj_pw: 1.0  # obj BCELoss positive_weight
+iou_t: 0.20  # IoU training threshold
+anchor_t: 4.0  # anchor-multiple threshold
+# anchors: 3  # anchors per output layer (0 to ignore)
+# this parameters are all zero since we want to use albumentation framework
+fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
+hsv_h: 0  # image HSV-Hue augmentation (fraction)
+hsv_s: 00  # image HSV-Saturation augmentation (fraction)
+hsv_v: 0  # image HSV-Value augmentation (fraction)
+degrees: 0.0  # image rotation (+/- deg)
+translate: 0  # image translation (+/- fraction)
+scale: 0  # image scale (+/- gain)
+shear: 0  # image shear (+/- deg)
+perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
+flipud: 0.0  # image flip up-down (probability)
+fliplr: 0.0  # image flip left-right (probability)
+mosaic: 0.0  # image mosaic (probability)
+mixup: 0.0  # image mixup (probability)
+copy_paste: 0.0  # segment copy-paste (probability)
diff --git a/data/hyps/hyp.scratch-high.yaml b/data/hyps/hyp.scratch-high.yaml
new file mode 100644
index 000000000000..123cc8407413
--- /dev/null
+++ b/data/hyps/hyp.scratch-high.yaml
@@ -0,0 +1,34 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Hyperparameters for high-augmentation COCO training from scratch
+# python train.py --batch 32 --cfg yolov5m6.yaml --weights '' --data coco.yaml --img 1280 --epochs 300
+# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
+
+lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
+lrf: 0.1  # final OneCycleLR learning rate (lr0 * lrf)
+momentum: 0.937  # SGD momentum/Adam beta1
+weight_decay: 0.0005  # optimizer weight decay 5e-4
+warmup_epochs: 3.0  # warmup epochs (fractions ok)
+warmup_momentum: 0.8  # warmup initial momentum
+warmup_bias_lr: 0.1  # warmup initial bias lr
+box: 0.05  # box loss gain
+cls: 0.3  # cls loss gain
+cls_pw: 1.0  # cls BCELoss positive_weight
+obj: 0.7  # obj loss gain (scale with pixels)
+obj_pw: 1.0  # obj BCELoss positive_weight
+iou_t: 0.20  # IoU training threshold
+anchor_t: 4.0  # anchor-multiple threshold
+# anchors: 3  # anchors per output layer (0 to ignore)
+fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
+hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
+hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
+hsv_v: 0.4  # image HSV-Value augmentation (fraction)
+degrees: 0.0  # image rotation (+/- deg)
+translate: 0.1  # image translation (+/- fraction)
+scale: 0.9  # image scale (+/- gain)
+shear: 0.0  # image shear (+/- deg)
+perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
+flipud: 0.0  # image flip up-down (probability)
+fliplr: 0.5  # image flip left-right (probability)
+mosaic: 1.0  # image mosaic (probability)
+mixup: 0.1  # image mixup (probability)
+copy_paste: 0.1  # segment copy-paste (probability)
diff --git a/data/hyp.scratch.yaml b/data/hyps/hyp.scratch-low.yaml
similarity index 80%
rename from data/hyp.scratch.yaml
rename to data/hyps/hyp.scratch-low.yaml
index f3b287413eee..b9ef1d55a3b6 100644
--- a/data/hyp.scratch.yaml
+++ b/data/hyps/hyp.scratch-low.yaml
@@ -1,9 +1,10 @@
-# Hyperparameters for COCO training from scratch
-# python train.py --batch 40 --cfg yolov5m.yaml --weights '' --data coco.yaml --img 640 --epochs 300
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Hyperparameters for low-augmentation COCO training from scratch
+# python train.py --batch 64 --cfg yolov5n6.yaml --weights '' --data coco.yaml --img 640 --epochs 300 --linear
 # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
 
 lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
-lrf: 0.2  # final OneCycleLR learning rate (lr0 * lrf)
+lrf: 0.01  # final OneCycleLR learning rate (lr0 * lrf)
 momentum: 0.937  # SGD momentum/Adam beta1
 weight_decay: 0.0005  # optimizer weight decay 5e-4
 warmup_epochs: 3.0  # warmup epochs (fractions ok)
@@ -30,3 +31,4 @@ flipud: 0.0  # image flip up-down (probability)
 fliplr: 0.5  # image flip left-right (probability)
 mosaic: 1.0  # image mosaic (probability)
 mixup: 0.0  # image mixup (probability)
+copy_paste: 0.0  # segment copy-paste (probability)
diff --git a/data/hyps/hyp.scratch-med.yaml b/data/hyps/hyp.scratch-med.yaml
new file mode 100644
index 000000000000..d6867d7557ba
--- /dev/null
+++ b/data/hyps/hyp.scratch-med.yaml
@@ -0,0 +1,34 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Hyperparameters for medium-augmentation COCO training from scratch
+# python train.py --batch 32 --cfg yolov5m6.yaml --weights '' --data coco.yaml --img 1280 --epochs 300
+# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
+
+lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
+lrf: 0.1  # final OneCycleLR learning rate (lr0 * lrf)
+momentum: 0.937  # SGD momentum/Adam beta1
+weight_decay: 0.0005  # optimizer weight decay 5e-4
+warmup_epochs: 3.0  # warmup epochs (fractions ok)
+warmup_momentum: 0.8  # warmup initial momentum
+warmup_bias_lr: 0.1  # warmup initial bias lr
+box: 0.05  # box loss gain
+cls: 0.3  # cls loss gain
+cls_pw: 1.0  # cls BCELoss positive_weight
+obj: 0.7  # obj loss gain (scale with pixels)
+obj_pw: 1.0  # obj BCELoss positive_weight
+iou_t: 0.20  # IoU training threshold
+anchor_t: 4.0  # anchor-multiple threshold
+# anchors: 3  # anchors per output layer (0 to ignore)
+fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
+hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
+hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
+hsv_v: 0.4  # image HSV-Value augmentation (fraction)
+degrees: 0.0  # image rotation (+/- deg)
+translate: 0.1  # image translation (+/- fraction)
+scale: 0.9  # image scale (+/- gain)
+shear: 0.0  # image shear (+/- deg)
+perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
+flipud: 0.0  # image flip up-down (probability)
+fliplr: 0.5  # image flip left-right (probability)
+mosaic: 1.0  # image mosaic (probability)
+mixup: 0.1  # image mixup (probability)
+copy_paste: 0.0  # segment copy-paste (probability)
diff --git a/data/scripts/download_weights.sh b/data/scripts/download_weights.sh
new file mode 100755
index 000000000000..31e0a15569f2
--- /dev/null
+++ b/data/scripts/download_weights.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Download latest models from https://github.com/ultralytics/yolov5/releases
+# Example usage: bash data/scripts/download_weights.sh
+# parent
+# └── yolov5
+#     ├── yolov5s.pt  ← downloads here
+#     ├── yolov5m.pt
+#     └── ...
+
+python - <<EOF
+from utils.downloads import attempt_download
+
+p5 = list('nsmlx')  # P5 models
+p6 = [f'{x}6' for x in p5]  # P6 models
+cls = [f'{x}-cls' for x in p5]  # classification models
+seg = [f'{x}-seg' for x in p5]  # classification models
+
+for x in p5 + p6 + cls + seg:
+    attempt_download(f'weights/yolov5{x}.pt')
+
+EOF
diff --git a/data/scripts/get_argoverse_hd.sh b/data/scripts/get_argoverse_hd.sh
deleted file mode 100644
index 18131a6764d6..000000000000
--- a/data/scripts/get_argoverse_hd.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/bin/bash
-# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
-# Download command: bash data/scripts/get_argoverse_hd.sh
-# Train command: python train.py --data argoverse_hd.yaml
-# Default dataset location is next to YOLOv5:
-#   /parent_folder
-#     /argoverse
-#     /yolov5
-
-# Download/unzip images
-d='../argoverse/' # unzip directory
-mkdir $d
-url=https://argoverse-hd.s3.us-east-2.amazonaws.com/
-f=Argoverse-HD-Full.zip
-curl -L $url$f -o $f && unzip -q $f -d $d && rm $f &# download, unzip, remove in background
-wait                                              # finish background tasks
-
-cd ../argoverse/Argoverse-1.1/
-ln -s tracking images
-
-cd ../Argoverse-HD/annotations/
-
-python3 - "$@" <<END
-import json
-from pathlib import Path
-
-annotation_files = ["train.json", "val.json"]
-print("Converting annotations to YOLOv5 format...")
-
-for val in annotation_files:
-    a = json.load(open(val, "rb"))
-
-    label_dict = {}
-    for annot in a['annotations']:
-        img_id = annot['image_id']
-        img_name = a['images'][img_id]['name']
-        img_label_name = img_name[:-3] + "txt"
-
-        obj_class = annot['category_id']
-        x_center, y_center, width, height = annot['bbox']
-        x_center = (x_center + width / 2) / 1920.  # offset and scale
-        y_center = (y_center + height / 2) / 1200.  # offset and scale
-        width /= 1920.  # scale
-        height /= 1200.  # scale
-
-        img_dir = "./labels/" + a['seq_dirs'][a['images'][annot['image_id']]['sid']]
-
-        Path(img_dir).mkdir(parents=True, exist_ok=True)
-
-        if img_dir + "/" + img_label_name not in label_dict:
-            label_dict[img_dir + "/" + img_label_name] = []
-
-        label_dict[img_dir + "/" + img_label_name].append(f"{obj_class} {x_center} {y_center} {width} {height}\n")
-
-    for filename in label_dict:
-        with open(filename, "w") as file:
-            for string in label_dict[filename]:
-                file.write(string)
-
-END
-
-mv ./labels ../../Argoverse-1.1/
diff --git a/data/scripts/get_coco.sh b/data/scripts/get_coco.sh
index caae37504780..0d388b0a12a8 100755
--- a/data/scripts/get_coco.sh
+++ b/data/scripts/get_coco.sh
@@ -1,27 +1,56 @@
 #!/bin/bash
-# COCO 2017 dataset http://cocodataset.org
-# Download command: bash data/scripts/get_coco.sh
-# Train command: python train.py --data coco.yaml
-# Default dataset location is next to YOLOv5:
-#   /parent_folder
-#     /coco
-#     /yolov5
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Download COCO 2017 dataset http://cocodataset.org
+# Example usage: bash data/scripts/get_coco.sh
+# parent
+# ├── yolov5
+# └── datasets
+#     └── coco  ← downloads here
+
+# Arguments (optional) Usage: bash data/scripts/get_coco.sh --train --val --test --segments
+if [ "$#" -gt 0 ]; then
+  for opt in "$@"; do
+    case "${opt}" in
+    --train) train=true ;;
+    --val) val=true ;;
+    --test) test=true ;;
+    --segments) segments=true ;;
+    esac
+  done
+else
+  train=true
+  val=true
+  test=false
+  segments=false
+fi
 
 # Download/unzip labels
-d='../' # unzip directory
+d='../datasets' # unzip directory
 url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
-f='coco2017labels.zip' # or 'coco2017labels-segments.zip', 68 MB
+if [ "$segments" == "true" ]; then
+  f='coco2017labels-segments.zip' # 168 MB
+else
+  f='coco2017labels.zip' # 46 MB
+fi
 echo 'Downloading' $url$f ' ...'
-curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background
+curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
 
 # Download/unzip images
-d='../coco/images' # unzip directory
+d='../datasets/coco/images' # unzip directory
 url=http://images.cocodataset.org/zips/
-f1='train2017.zip' # 19G, 118k images
-f2='val2017.zip'   # 1G, 5k images
-f3='test2017.zip'  # 7G, 41k images (optional)
-for f in $f1 $f2; do
+if [ "$train" == "true" ]; then
+  f='train2017.zip' # 19G, 118k images
+  echo 'Downloading' $url$f '...'
+  curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
+fi
+if [ "$val" == "true" ]; then
+  f='val2017.zip' # 1G, 5k images
+  echo 'Downloading' $url$f '...'
+  curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
+fi
+if [ "$test" == "true" ]; then
+  f='test2017.zip' # 7G, 41k images (optional)
   echo 'Downloading' $url$f '...'
-  curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background
-done
+  curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
+fi
 wait # finish background tasks
diff --git a/data/scripts/get_coco128.sh b/data/scripts/get_coco128.sh
new file mode 100755
index 000000000000..e7ddce89b115
--- /dev/null
+++ b/data/scripts/get_coco128.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Download COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017)
+# Example usage: bash data/scripts/get_coco128.sh
+# parent
+# ├── yolov5
+# └── datasets
+#     └── coco128  ← downloads here
+
+# Download/unzip images and labels
+d='../datasets' # unzip directory
+url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
+f='coco128.zip' # or 'coco128-segments.zip', 68 MB
+echo 'Downloading' $url$f ' ...'
+curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
+
+wait # finish background tasks
diff --git a/data/scripts/get_imagenet.sh b/data/scripts/get_imagenet.sh
new file mode 100755
index 000000000000..6026d502e8f3
--- /dev/null
+++ b/data/scripts/get_imagenet.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Download ILSVRC2012 ImageNet dataset https://image-net.org
+# Example usage: bash data/scripts/get_imagenet.sh
+# parent
+# ├── yolov5
+# └── datasets
+#     └── imagenet  ← downloads here
+
+# Arguments (optional) Usage: bash data/scripts/get_imagenet.sh --train --val
+if [ "$#" -gt 0 ]; then
+  for opt in "$@"; do
+    case "${opt}" in
+    --train) train=true ;;
+    --val) val=true ;;
+    esac
+  done
+else
+  train=true
+  val=true
+fi
+
+# Make dir
+d='../datasets/imagenet' # unzip directory
+mkdir -p $d && cd $d
+
+# Download/unzip train
+if [ "$train" == "true" ]; then
+  wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_train.tar # download 138G, 1281167 images
+  mkdir train && mv ILSVRC2012_img_train.tar train/ && cd train
+  tar -xf ILSVRC2012_img_train.tar && rm -f ILSVRC2012_img_train.tar
+  find . -name "*.tar" | while read NAME; do
+    mkdir -p "${NAME%.tar}"
+    tar -xf "${NAME}" -C "${NAME%.tar}"
+    rm -f "${NAME}"
+  done
+  cd ..
+fi
+
+# Download/unzip val
+if [ "$val" == "true" ]; then
+  wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar # download 6.3G, 50000 images
+  mkdir val && mv ILSVRC2012_img_val.tar val/ && cd val && tar -xf ILSVRC2012_img_val.tar
+  wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash # move into subdirs
+fi
+
+# Delete corrupted image (optional: PNG under JPEG name that may cause dataloaders to fail)
+# rm train/n04266014/n04266014_10835.JPEG
+
+# TFRecords (optional)
+# wget https://raw.githubusercontent.com/tensorflow/models/master/research/slim/datasets/imagenet_lsvrc_2015_synsets.txt
diff --git a/data/scripts/get_voc.sh b/data/scripts/get_voc.sh
deleted file mode 100644
index 4c04aaa95a29..000000000000
--- a/data/scripts/get_voc.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/bin/bash
-# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
-# Download command: bash data/scripts/get_voc.sh
-# Train command: python train.py --data voc.yaml
-# Default dataset location is next to YOLOv5:
-#   /parent_folder
-#     /VOC
-#     /yolov5
-
-start=$(date +%s)
-mkdir -p ../tmp
-cd ../tmp/
-
-# Download/unzip images and labels
-d='.' # unzip directory
-url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
-f1=VOCtrainval_06-Nov-2007.zip # 446MB, 5012 images
-f2=VOCtest_06-Nov-2007.zip     # 438MB, 4953 images
-f3=VOCtrainval_11-May-2012.zip # 1.95GB, 17126 images
-for f in $f3 $f2 $f1; do
-  echo 'Downloading' $url$f '...' 
-  curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background
-done
-wait # finish background tasks
-
-end=$(date +%s)
-runtime=$((end - start))
-echo "Completed in" $runtime "seconds"
-
-echo "Splitting dataset..."
-python3 - "$@" <<END
-import os
-import xml.etree.ElementTree as ET
-from os import getcwd
-
-sets = [('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
-
-classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog",
-           "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
-
-
-def convert_box(size, box):
-    dw = 1. / (size[0])
-    dh = 1. / (size[1])
-    x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
-    return x * dw, y * dh, w * dw, h * dh
-
-
-def convert_annotation(year, image_id):
-    in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml' % (year, image_id))
-    out_file = open('VOCdevkit/VOC%s/labels/%s.txt' % (year, image_id), 'w')
-    tree = ET.parse(in_file)
-    root = tree.getroot()
-    size = root.find('size')
-    w = int(size.find('width').text)
-    h = int(size.find('height').text)
-
-    for obj in root.iter('object'):
-        difficult = obj.find('difficult').text
-        cls = obj.find('name').text
-        if cls not in classes or int(difficult) == 1:
-            continue
-        cls_id = classes.index(cls)
-        xmlbox = obj.find('bndbox')
-        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
-             float(xmlbox.find('ymax').text))
-        bb = convert_box((w, h), b)
-        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
-
-
-cwd = getcwd()
-for year, image_set in sets:
-    if not os.path.exists('VOCdevkit/VOC%s/labels/' % year):
-        os.makedirs('VOCdevkit/VOC%s/labels/' % year)
-    image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt' % (year, image_set)).read().strip().split()
-    list_file = open('%s_%s.txt' % (year, image_set), 'w')
-    for image_id in image_ids:
-        list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n' % (cwd, year, image_id))
-        convert_annotation(year, image_id)
-    list_file.close()
-END
-
-cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt >train.txt
-cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt >train.all.txt
-
-mkdir ../VOC ../VOC/images ../VOC/images/train ../VOC/images/val
-mkdir ../VOC/labels ../VOC/labels/train ../VOC/labels/val
-
-python3 - "$@" <<END
-import os
-
-print(os.path.exists('../tmp/train.txt'))
-with open('../tmp/train.txt', 'r') as f:
-    for line in f.readlines():
-        line = "/".join(line.split('/')[-5:]).strip()
-        if os.path.exists("../" + line):
-            os.system("cp ../" + line + " ../VOC/images/train")
-
-        line = line.replace('JPEGImages', 'labels').replace('jpg', 'txt')
-        if os.path.exists("../" + line):
-            os.system("cp ../" + line + " ../VOC/labels/train")
-
-print(os.path.exists('../tmp/2007_test.txt'))
-with open('../tmp/2007_test.txt', 'r') as f:
-    for line in f.readlines():
-        line = "/".join(line.split('/')[-5:]).strip()
-        if os.path.exists("../" + line):
-            os.system("cp ../" + line + " ../VOC/images/val")
-
-        line = line.replace('JPEGImages', 'labels').replace('jpg', 'txt')
-        if os.path.exists("../" + line):
-            os.system("cp ../" + line + " ../VOC/labels/val")
-END
-
-rm -rf ../tmp # remove temporary directory
-echo "VOC download done."
diff --git a/data/voc.yaml b/data/voc.yaml
deleted file mode 100644
index ca293c4f091f..000000000000
--- a/data/voc.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
-# Train command: python train.py --data voc.yaml
-# Default dataset location is next to YOLOv5:
-#   /parent_folder
-#     /VOC
-#     /yolov5
-
-
-# download command/URL (optional)
-download: bash data/scripts/get_voc.sh
-
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
-train: ../VOC/images/train/  # 16551 images
-val: ../VOC/images/val/  # 4952 images
-
-# number of classes
-nc: 20
-
-# class names
-names: [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
-         'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ]
diff --git a/data/xView.yaml b/data/xView.yaml
new file mode 100644
index 000000000000..770ab7870449
--- /dev/null
+++ b/data/xView.yaml
@@ -0,0 +1,153 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# DIUx xView 2018 Challenge https://challenge.xviewdataset.org by U.S. National Geospatial-Intelligence Agency (NGA)
+# --------  DOWNLOAD DATA MANUALLY and jar xf val_images.zip to 'datasets/xView' before running train command!  --------
+# Example usage: python train.py --data xView.yaml
+# parent
+# ├── yolov5
+# └── datasets
+#     └── xView  ← downloads here (20.7 GB)
+
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/xView  # dataset root dir
+train: images/autosplit_train.txt  # train images (relative to 'path') 90% of 847 train images
+val: images/autosplit_val.txt  # train images (relative to 'path') 10% of 847 train images
+
+# Classes
+names:
+  0: Fixed-wing Aircraft
+  1: Small Aircraft
+  2: Cargo Plane
+  3: Helicopter
+  4: Passenger Vehicle
+  5: Small Car
+  6: Bus
+  7: Pickup Truck
+  8: Utility Truck
+  9: Truck
+  10: Cargo Truck
+  11: Truck w/Box
+  12: Truck Tractor
+  13: Trailer
+  14: Truck w/Flatbed
+  15: Truck w/Liquid
+  16: Crane Truck
+  17: Railway Vehicle
+  18: Passenger Car
+  19: Cargo Car
+  20: Flat Car
+  21: Tank car
+  22: Locomotive
+  23: Maritime Vessel
+  24: Motorboat
+  25: Sailboat
+  26: Tugboat
+  27: Barge
+  28: Fishing Vessel
+  29: Ferry
+  30: Yacht
+  31: Container Ship
+  32: Oil Tanker
+  33: Engineering Vehicle
+  34: Tower crane
+  35: Container Crane
+  36: Reach Stacker
+  37: Straddle Carrier
+  38: Mobile Crane
+  39: Dump Truck
+  40: Haul Truck
+  41: Scraper/Tractor
+  42: Front loader/Bulldozer
+  43: Excavator
+  44: Cement Mixer
+  45: Ground Grader
+  46: Hut/Tent
+  47: Shed
+  48: Building
+  49: Aircraft Hangar
+  50: Damaged Building
+  51: Facility
+  52: Construction Site
+  53: Vehicle Lot
+  54: Helipad
+  55: Storage Tank
+  56: Shipping container lot
+  57: Shipping Container
+  58: Pylon
+  59: Tower
+
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  import json
+  import os
+  from pathlib import Path
+
+  import numpy as np
+  from PIL import Image
+  from tqdm import tqdm
+
+  from utils.dataloaders import autosplit
+  from utils.general import download, xyxy2xywhn
+
+
+  def convert_labels(fname=Path('xView/xView_train.geojson')):
+      # Convert xView geoJSON labels to YOLO format
+      path = fname.parent
+      with open(fname) as f:
+          print(f'Loading {fname}...')
+          data = json.load(f)
+
+      # Make dirs
+      labels = Path(path / 'labels' / 'train')
+      os.system(f'rm -rf {labels}')
+      labels.mkdir(parents=True, exist_ok=True)
+
+      # xView classes 11-94 to 0-59
+      xview_class2index = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, -1, 9, 10, 11,
+                           12, 13, 14, 15, -1, -1, 16, 17, 18, 19, 20, 21, 22, -1, 23, 24, 25, -1, 26, 27, -1, 28, -1,
+                           29, 30, 31, 32, 33, 34, 35, 36, 37, -1, 38, 39, 40, 41, 42, 43, 44, 45, -1, -1, -1, -1, 46,
+                           47, 48, 49, -1, 50, 51, -1, 52, -1, -1, -1, 53, 54, -1, 55, -1, -1, 56, -1, 57, -1, 58, 59]
+
+      shapes = {}
+      for feature in tqdm(data['features'], desc=f'Converting {fname}'):
+          p = feature['properties']
+          if p['bounds_imcoords']:
+              id = p['image_id']
+              file = path / 'train_images' / id
+              if file.exists():  # 1395.tif missing
+                  try:
+                      box = np.array([int(num) for num in p['bounds_imcoords'].split(",")])
+                      assert box.shape[0] == 4, f'incorrect box shape {box.shape[0]}'
+                      cls = p['type_id']
+                      cls = xview_class2index[int(cls)]  # xView class to 0-60
+                      assert 59 >= cls >= 0, f'incorrect class index {cls}'
+
+                      # Write YOLO label
+                      if id not in shapes:
+                          shapes[id] = Image.open(file).size
+                      box = xyxy2xywhn(box[None].astype(np.float), w=shapes[id][0], h=shapes[id][1], clip=True)
+                      with open((labels / id).with_suffix('.txt'), 'a') as f:
+                          f.write(f"{cls} {' '.join(f'{x:.6f}' for x in box[0])}\n")  # write label.txt
+                  except Exception as e:
+                      print(f'WARNING: skipping one label for {file}: {e}')
+
+
+  # Download manually from https://challenge.xviewdataset.org
+  dir = Path(yaml['path'])  # dataset root dir
+  # urls = ['https://d307kc0mrhucc3.cloudfront.net/train_labels.zip',  # train labels
+  #         'https://d307kc0mrhucc3.cloudfront.net/train_images.zip',  # 15G, 847 train images
+  #         'https://d307kc0mrhucc3.cloudfront.net/val_images.zip']  # 5G, 282 val images (no labels)
+  # download(urls, dir=dir, delete=False)
+
+  # Convert labels
+  convert_labels(dir / 'xView_train.geojson')
+
+  # Move images
+  images = Path(dir / 'images')
+  images.mkdir(parents=True, exist_ok=True)
+  Path(dir / 'train_images').rename(dir / 'images' / 'train')
+  Path(dir / 'val_images').rename(dir / 'images' / 'val')
+
+  # Split
+  autosplit(dir / 'images' / 'train')
diff --git a/detect.py b/detect.py
index f5e53d991504..3f32d7a50d6b 100644
--- a/detect.py
+++ b/detect.py
@@ -1,126 +1,186 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Run YOLOv5 detection inference on images, videos, directories, globs, YouTube, webcam, streams, etc.
+
+Usage - sources:
+    $ python detect.py --weights yolov5s.pt --source 0                               # webcam
+                                                     img.jpg                         # image
+                                                     vid.mp4                         # video
+                                                     screen                          # screenshot
+                                                     path/                           # directory
+                                                     list.txt                        # list of images
+                                                     list.streams                    # list of streams
+                                                     'path/*.jpg'                    # glob
+                                                     'https://youtu.be/Zgi9g1ksQHc'  # YouTube
+                                                     'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP stream
+
+Usage - formats:
+    $ python detect.py --weights yolov5s.pt                 # PyTorch
+                                 yolov5s.torchscript        # TorchScript
+                                 yolov5s.onnx               # ONNX Runtime or OpenCV DNN with --dnn
+                                 yolov5s_openvino_model     # OpenVINO
+                                 yolov5s.engine             # TensorRT
+                                 yolov5s.mlmodel            # CoreML (macOS-only)
+                                 yolov5s_saved_model        # TensorFlow SavedModel
+                                 yolov5s.pb                 # TensorFlow GraphDef
+                                 yolov5s.tflite             # TensorFlow Lite
+                                 yolov5s_edgetpu.tflite     # TensorFlow Edge TPU
+                                 yolov5s_paddle_model       # PaddlePaddle
+"""
+
 import argparse
-import time
+import os
+import platform
+import sys
 from pathlib import Path
 
-import cv2
 import torch
-import torch.backends.cudnn as cudnn
-from numpy import random
 
-from models.experimental import attempt_load
-from utils.datasets import LoadStreams, LoadImages
-from utils.general import check_img_size, check_requirements, check_imshow, non_max_suppression, apply_classifier, \
-    scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path, save_one_box
-from utils.plots import plot_one_box
-from utils.torch_utils import select_device, load_classifier, time_synchronized
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[0]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 
+from models.common import DetectMultiBackend
+from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams
+from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
+                           increment_path, non_max_suppression, print_args, scale_boxes, strip_optimizer, xyxy2xywh)
+from utils.plots import Annotator, colors, save_one_box
+from utils.torch_utils import select_device, smart_inference_mode
 
-def detect(opt):
-    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
-    save_img = not opt.nosave and not source.endswith('.txt')  # save inference images
-    webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
-        ('rtsp://', 'rtmp://', 'http://', 'https://'))
+
+@smart_inference_mode()
+def run(
+        weights=ROOT / 'yolov5s.pt',  # model path or triton URL
+        source=ROOT / 'data/images',  # file/dir/URL/glob/screen/0(webcam)
+        data=ROOT / 'data/coco128.yaml',  # dataset.yaml path
+        imgsz=(640, 640),  # inference size (height, width)
+        conf_thres=0.25,  # confidence threshold
+        iou_thres=0.45,  # NMS IOU threshold
+        max_det=1000,  # maximum detections per image
+        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
+        view_img=False,  # show results
+        save_txt=False,  # save results to *.txt
+        save_conf=False,  # save confidences in --save-txt labels
+        save_crop=False,  # save cropped prediction boxes
+        nosave=False,  # do not save images/videos
+        classes=None,  # filter by class: --class 0, or --class 0 2 3
+        agnostic_nms=False,  # class-agnostic NMS
+        augment=False,  # augmented inference
+        visualize=False,  # visualize features
+        update=False,  # update all models
+        project=ROOT / 'runs/detect',  # save results to project/name
+        name='exp',  # save results to project/name
+        exist_ok=False,  # existing project/name ok, do not increment
+        line_thickness=3,  # bounding box thickness (pixels)
+        hide_labels=False,  # hide labels
+        hide_conf=False,  # hide confidences
+        half=False,  # use FP16 half-precision inference
+        dnn=False,  # use OpenCV DNN for ONNX inference
+        vid_stride=1,  # video frame-rate stride
+):
+    source = str(source)
+    save_img = not nosave and not source.endswith('.txt')  # save inference images
+    is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
+    is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
+    webcam = source.isnumeric() or source.endswith('.streams') or (is_url and not is_file)
+    screenshot = source.lower().startswith('screen')
+    if is_url and is_file:
+        source = check_file(source)  # download
 
     # Directories
-    save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)  # increment run
+    save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
     (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
 
-    # Initialize
-    set_logging()
-    device = select_device(opt.device)
-    half = device.type != 'cpu'  # half precision only supported on CUDA
-
     # Load model
-    model = attempt_load(weights, map_location=device)  # load FP32 model
-    stride = int(model.stride.max())  # model stride
-    imgsz = check_img_size(imgsz, s=stride)  # check img_size
-    if half:
-        model.half()  # to FP16
-
-    # Second-stage classifier
-    classify = False
-    if classify:
-        modelc = load_classifier(name='resnet101', n=2)  # initialize
-        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
-
-    # Set Dataloader
-    vid_path, vid_writer = None, None
+    device = select_device(device)
+    model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
+    stride, names, pt = model.stride, model.names, model.pt
+    imgsz = check_img_size(imgsz, s=stride)  # check image size
+
+    # Dataloader
+    bs = 1  # batch_size
     if webcam:
-        view_img = check_imshow()
-        cudnn.benchmark = True  # set True to speed up constant image size inference
-        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
+        view_img = check_imshow(warn=True)
+        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
+        bs = len(dataset)
+    elif screenshot:
+        dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt)
     else:
-        dataset = LoadImages(source, img_size=imgsz, stride=stride)
-
-    # Get names and colors
-    names = model.module.names if hasattr(model, 'module') else model.names
-    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
+        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
+    vid_path, vid_writer = [None] * bs, [None] * bs
 
     # Run inference
-    if device.type != 'cpu':
-        model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
-    t0 = time.time()
-    for path, img, im0s, vid_cap in dataset:
-        img = torch.from_numpy(img).to(device)
-        img = img.half() if half else img.float()  # uint8 to fp16/32
-        img /= 255.0  # 0 - 255 to 0.0 - 1.0
-        if img.ndimension() == 3:
-            img = img.unsqueeze(0)
+    model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz))  # warmup
+    seen, windows, dt = 0, [], (Profile(), Profile(), Profile())
+    for path, im, im0s, vid_cap, s in dataset:
+        with dt[0]:
+            im = torch.from_numpy(im).to(model.device)
+            im = im.half() if model.fp16 else im.float()  # uint8 to fp16/32
+            im /= 255  # 0 - 255 to 0.0 - 1.0
+            if len(im.shape) == 3:
+                im = im[None]  # expand for batch dim
 
         # Inference
-        t1 = time_synchronized()
-        pred = model(img, augment=opt.augment)[0]
+        with dt[1]:
+            visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
+            pred = model(im, augment=augment, visualize=visualize)
 
-        # Apply NMS
-        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
-        t2 = time_synchronized()
+        # NMS
+        with dt[2]:
+            pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
 
-        # Apply Classifier
-        if classify:
-            pred = apply_classifier(pred, modelc, img, im0s)
+        # Second-stage classifier (optional)
+        # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
 
-        # Process detections
-        for i, det in enumerate(pred):  # detections per image
+        # Process predictions
+        for i, det in enumerate(pred):  # per image
+            seen += 1
             if webcam:  # batch_size >= 1
-                p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
+                p, im0, frame = path[i], im0s[i].copy(), dataset.count
+                s += f'{i}: '
             else:
-                p, s, im0, frame = path, '', im0s.copy(), getattr(dataset, 'frame', 0)
+                p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
 
             p = Path(p)  # to Path
-            save_path = str(save_dir / p.name)  # img.jpg
-            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # img.txt
-            s += '%gx%g ' % img.shape[2:]  # print string
+            save_path = str(save_dir / p.name)  # im.jpg
+            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # im.txt
+            s += '%gx%g ' % im.shape[2:]  # print string
             gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
+            imc = im0.copy() if save_crop else im0  # for save_crop
+            annotator = Annotator(im0, line_width=line_thickness, example=str(names))
             if len(det):
                 # Rescale boxes from img_size to im0 size
-                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
+                det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
 
                 # Print results
-                for c in det[:, -1].unique():
-                    n = (det[:, -1] == c).sum()  # detections per class
+                for c in det[:, 5].unique():
+                    n = (det[:, 5] == c).sum()  # detections per class
                     s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
 
                 # Write results
                 for *xyxy, conf, cls in reversed(det):
                     if save_txt:  # Write to file
                         xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
-                        line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh)  # label format
-                        with open(txt_path + '.txt', 'a') as f:
+                        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
+                        with open(f'{txt_path}.txt', 'a') as f:
                             f.write(('%g ' * len(line)).rstrip() % line + '\n')
 
-                    if save_img or opt.save_crop or view_img:  # Add bbox to image
+                    if save_img or save_crop or view_img:  # Add bbox to image
                         c = int(cls)  # integer class
-                        label = None if opt.hide_labels else (names[c] if opt.hide_conf else f'{names[c]} {conf:.2f}')
-
-                        plot_one_box(xyxy, im0, label=label, color=colors[c], line_thickness=opt.line_thickness)
-                        if opt.save_crop:
-                            save_one_box(xyxy, im0s, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
-
-            # Print time (inference + NMS)
-            print(f'{s}Done. ({t2 - t1:.3f}s)')
+                        label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
+                        annotator.box_label(xyxy, label, color=colors(c, True))
+                    if save_crop:
+                        save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
 
             # Stream results
+            im0 = annotator.result()
             if view_img:
+                if platform.system() == 'Linux' and p not in windows:
+                    windows.append(p)
+                    cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)  # allow window resize (Linux)
+                    cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
                 cv2.imshow(str(p), im0)
                 cv2.waitKey(1)  # 1 millisecond
 
@@ -129,58 +189,73 @@ def detect(opt):
                 if dataset.mode == 'image':
                     cv2.imwrite(save_path, im0)
                 else:  # 'video' or 'stream'
-                    if vid_path != save_path:  # new video
-                        vid_path = save_path
-                        if isinstance(vid_writer, cv2.VideoWriter):
-                            vid_writer.release()  # release previous video writer
+                    if vid_path[i] != save_path:  # new video
+                        vid_path[i] = save_path
+                        if isinstance(vid_writer[i], cv2.VideoWriter):
+                            vid_writer[i].release()  # release previous video writer
                         if vid_cap:  # video
                             fps = vid_cap.get(cv2.CAP_PROP_FPS)
                             w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                             h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                         else:  # stream
                             fps, w, h = 30, im0.shape[1], im0.shape[0]
-                            save_path += '.mp4'
-                        vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
-                    vid_writer.write(im0)
+                        save_path = str(Path(save_path).with_suffix('.mp4'))  # force *.mp4 suffix on results videos
+                        vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
+                    vid_writer[i].write(im0)
 
+        # Print time (inference-only)
+        LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms")
+
+    # Print results
+    t = tuple(x.t / seen * 1E3 for x in dt)  # speeds per image
+    LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
     if save_txt or save_img:
         s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
-        print(f"Results saved to {save_dir}{s}")
-
-    print(f'Done. ({time.time() - t0:.3f}s)')
+        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
+    if update:
+        strip_optimizer(weights[0])  # update model (to fix SourceChangeWarning)
 
 
-if __name__ == '__main__':
+def parse_opt():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
-    parser.add_argument('--source', type=str, default='data/images', help='source')  # file/folder, 0 for webcam
-    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
-    parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
-    parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
+    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path or triton URL')
+    parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob/screen/0(webcam)')
+    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path')
+    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
+    parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
+    parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
+    parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
-    parser.add_argument('--view-img', action='store_true', help='display results')
+    parser.add_argument('--view-img', action='store_true', help='show results')
     parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
     parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
     parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
     parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
-    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
+    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
     parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
     parser.add_argument('--augment', action='store_true', help='augmented inference')
+    parser.add_argument('--visualize', action='store_true', help='visualize features')
     parser.add_argument('--update', action='store_true', help='update all models')
-    parser.add_argument('--project', default='runs/detect', help='save results to project/name')
+    parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
     parser.add_argument('--name', default='exp', help='save results to project/name')
     parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
     parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
     parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
     parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
+    parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
+    parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
+    parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride')
     opt = parser.parse_args()
-    print(opt)
-    check_requirements(exclude=('pycocotools', 'thop'))
-
-    with torch.no_grad():
-        if opt.update:  # update all models (to fix SourceChangeWarning)
-            for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
-                detect(opt=opt)
-                strip_optimizer(opt.weights)
-        else:
-            detect(opt=opt)
+    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
+    print_args(vars(opt))
+    return opt
+
+
+def main(opt):
+    check_requirements(exclude=('tensorboard', 'thop'))
+    run(**vars(opt))
+
+
+if __name__ == '__main__':
+    opt = parse_opt()
+    main(opt)
diff --git a/export.py b/export.py
new file mode 100644
index 000000000000..e167b2088cb1
--- /dev/null
+++ b/export.py
@@ -0,0 +1,672 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Export a YOLOv5 PyTorch model to other formats. TensorFlow exports authored by https://github.com/zldrobit
+
+Format                      | `export.py --include`         | Model
+---                         | ---                           | ---
+PyTorch                     | -                             | yolov5s.pt
+TorchScript                 | `torchscript`                 | yolov5s.torchscript
+ONNX                        | `onnx`                        | yolov5s.onnx
+OpenVINO                    | `openvino`                    | yolov5s_openvino_model/
+TensorRT                    | `engine`                      | yolov5s.engine
+CoreML                      | `coreml`                      | yolov5s.mlmodel
+TensorFlow SavedModel       | `saved_model`                 | yolov5s_saved_model/
+TensorFlow GraphDef         | `pb`                          | yolov5s.pb
+TensorFlow Lite             | `tflite`                      | yolov5s.tflite
+TensorFlow Edge TPU         | `edgetpu`                     | yolov5s_edgetpu.tflite
+TensorFlow.js               | `tfjs`                        | yolov5s_web_model/
+PaddlePaddle                | `paddle`                      | yolov5s_paddle_model/
+
+Requirements:
+    $ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime openvino-dev tensorflow-cpu  # CPU
+    $ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime-gpu openvino-dev tensorflow  # GPU
+
+Usage:
+    $ python export.py --weights yolov5s.pt --include torchscript onnx openvino engine coreml tflite ...
+
+Inference:
+    $ python detect.py --weights yolov5s.pt                 # PyTorch
+                                 yolov5s.torchscript        # TorchScript
+                                 yolov5s.onnx               # ONNX Runtime or OpenCV DNN with --dnn
+                                 yolov5s_openvino_model     # OpenVINO
+                                 yolov5s.engine             # TensorRT
+                                 yolov5s.mlmodel            # CoreML (macOS-only)
+                                 yolov5s_saved_model        # TensorFlow SavedModel
+                                 yolov5s.pb                 # TensorFlow GraphDef
+                                 yolov5s.tflite             # TensorFlow Lite
+                                 yolov5s_edgetpu.tflite     # TensorFlow Edge TPU
+                                 yolov5s_paddle_model       # PaddlePaddle
+
+TensorFlow.js:
+    $ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example
+    $ npm install
+    $ ln -s ../../yolov5/yolov5s_web_model public/yolov5s_web_model
+    $ npm start
+"""
+
+import argparse
+import contextlib
+import json
+import os
+import platform
+import re
+import subprocess
+import sys
+import time
+import warnings
+from pathlib import Path
+
+import pandas as pd
+import torch
+from torch.utils.mobile_optimizer import optimize_for_mobile
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[0]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+if platform.system() != 'Windows':
+    ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+
+from models.experimental import attempt_load
+from models.yolo import ClassificationModel, Detect, DetectionModel, SegmentationModel
+from utils.dataloaders import LoadImages
+from utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_version,
+                           check_yaml, colorstr, file_size, get_default_args, print_args, url2file, yaml_save)
+from utils.torch_utils import select_device, smart_inference_mode
+
+MACOS = platform.system() == 'Darwin'  # macOS environment
+
+
+def export_formats():
+    # YOLOv5 export formats
+    x = [
+        ['PyTorch', '-', '.pt', True, True],
+        ['TorchScript', 'torchscript', '.torchscript', True, True],
+        ['ONNX', 'onnx', '.onnx', True, True],
+        ['OpenVINO', 'openvino', '_openvino_model', True, False],
+        ['TensorRT', 'engine', '.engine', False, True],
+        ['CoreML', 'coreml', '.mlmodel', True, False],
+        ['TensorFlow SavedModel', 'saved_model', '_saved_model', True, True],
+        ['TensorFlow GraphDef', 'pb', '.pb', True, True],
+        ['TensorFlow Lite', 'tflite', '.tflite', True, False],
+        ['TensorFlow Edge TPU', 'edgetpu', '_edgetpu.tflite', False, False],
+        ['TensorFlow.js', 'tfjs', '_web_model', False, False],
+        ['PaddlePaddle', 'paddle', '_paddle_model', True, True],]
+    return pd.DataFrame(x, columns=['Format', 'Argument', 'Suffix', 'CPU', 'GPU'])
+
+
+def try_export(inner_func):
+    # YOLOv5 export decorator, i..e @try_export
+    inner_args = get_default_args(inner_func)
+
+    def outer_func(*args, **kwargs):
+        prefix = inner_args['prefix']
+        try:
+            with Profile() as dt:
+                f, model = inner_func(*args, **kwargs)
+            LOGGER.info(f'{prefix} export success ✅ {dt.t:.1f}s, saved as {f} ({file_size(f):.1f} MB)')
+            return f, model
+        except Exception as e:
+            LOGGER.info(f'{prefix} export failure ❌ {dt.t:.1f}s: {e}')
+            return None, None
+
+    return outer_func
+
+
+@try_export
+def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:')):
+    # YOLOv5 TorchScript model export
+    LOGGER.info(f'\n{prefix} starting export with torch {torch.__version__}...')
+    f = file.with_suffix('.torchscript')
+
+    ts = torch.jit.trace(model, im, strict=False)
+    d = {'shape': im.shape, 'stride': int(max(model.stride)), 'names': model.names}
+    extra_files = {'config.txt': json.dumps(d)}  # torch._C.ExtraFilesMap()
+    if optimize:  # https://pytorch.org/tutorials/recipes/mobile_interpreter.html
+        optimize_for_mobile(ts)._save_for_lite_interpreter(str(f), _extra_files=extra_files)
+    else:
+        ts.save(str(f), _extra_files=extra_files)
+    return f, None
+
+
+@try_export
+def export_onnx(model, im, file, opset, dynamic, simplify, prefix=colorstr('ONNX:')):
+    # YOLOv5 ONNX export
+    check_requirements('onnx>=1.12.0')
+    import onnx
+
+    LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...')
+    f = file.with_suffix('.onnx')
+
+    output_names = ['output0', 'output1'] if isinstance(model, SegmentationModel) else ['output0']
+    if dynamic:
+        dynamic = {'images': {0: 'batch', 2: 'height', 3: 'width'}}  # shape(1,3,640,640)
+        if isinstance(model, SegmentationModel):
+            dynamic['output0'] = {0: 'batch', 1: 'anchors'}  # shape(1,25200,85)
+            dynamic['output1'] = {0: 'batch', 2: 'mask_height', 3: 'mask_width'}  # shape(1,32,160,160)
+        elif isinstance(model, DetectionModel):
+            dynamic['output0'] = {0: 'batch', 1: 'anchors'}  # shape(1,25200,85)
+
+    torch.onnx.export(
+        model.cpu() if dynamic else model,  # --dynamic only compatible with cpu
+        im.cpu() if dynamic else im,
+        f,
+        verbose=False,
+        opset_version=opset,
+        do_constant_folding=True,  # WARNING: DNN inference with torch>=1.12 may require do_constant_folding=False
+        input_names=['images'],
+        output_names=output_names,
+        dynamic_axes=dynamic or None)
+
+    # Checks
+    model_onnx = onnx.load(f)  # load onnx model
+    onnx.checker.check_model(model_onnx)  # check onnx model
+
+    # Metadata
+    d = {'stride': int(max(model.stride)), 'names': model.names}
+    for k, v in d.items():
+        meta = model_onnx.metadata_props.add()
+        meta.key, meta.value = k, str(v)
+    onnx.save(model_onnx, f)
+
+    # Simplify
+    if simplify:
+        try:
+            cuda = torch.cuda.is_available()
+            check_requirements(('onnxruntime-gpu' if cuda else 'onnxruntime', 'onnx-simplifier>=0.4.1'))
+            import onnxsim
+
+            LOGGER.info(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...')
+            model_onnx, check = onnxsim.simplify(model_onnx)
+            assert check, 'assert check failed'
+            onnx.save(model_onnx, f)
+        except Exception as e:
+            LOGGER.info(f'{prefix} simplifier failure: {e}')
+    return f, model_onnx
+
+
+@try_export
+def export_openvino(file, metadata, half, prefix=colorstr('OpenVINO:')):
+    # YOLOv5 OpenVINO export
+    check_requirements('openvino-dev')  # requires openvino-dev: https://pypi.org/project/openvino-dev/
+    import openvino.inference_engine as ie
+
+    LOGGER.info(f'\n{prefix} starting export with openvino {ie.__version__}...')
+    f = str(file).replace('.pt', f'_openvino_model{os.sep}')
+
+    args = [
+        'mo',
+        '--input_model',
+        str(file.with_suffix('.onnx')),
+        '--output_dir',
+        f,
+        '--data_type',
+        ('FP16' if half else 'FP32'),]
+    subprocess.run(args, check=True, env=os.environ)  # export
+    yaml_save(Path(f) / file.with_suffix('.yaml').name, metadata)  # add metadata.yaml
+    return f, None
+
+
+@try_export
+def export_paddle(model, im, file, metadata, prefix=colorstr('PaddlePaddle:')):
+    # YOLOv5 Paddle export
+    check_requirements(('paddlepaddle', 'x2paddle'))
+    import x2paddle
+    from x2paddle.convert import pytorch2paddle
+
+    LOGGER.info(f'\n{prefix} starting export with X2Paddle {x2paddle.__version__}...')
+    f = str(file).replace('.pt', f'_paddle_model{os.sep}')
+
+    pytorch2paddle(module=model, save_dir=f, jit_type='trace', input_examples=[im])  # export
+    yaml_save(Path(f) / file.with_suffix('.yaml').name, metadata)  # add metadata.yaml
+    return f, None
+
+
+@try_export
+def export_coreml(model, im, file, int8, half, prefix=colorstr('CoreML:')):
+    # YOLOv5 CoreML export
+    check_requirements('coremltools')
+    import coremltools as ct
+
+    LOGGER.info(f'\n{prefix} starting export with coremltools {ct.__version__}...')
+    f = file.with_suffix('.mlmodel')
+
+    ts = torch.jit.trace(model, im, strict=False)  # TorchScript model
+    ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=im.shape, scale=1 / 255, bias=[0, 0, 0])])
+    bits, mode = (8, 'kmeans_lut') if int8 else (16, 'linear') if half else (32, None)
+    if bits < 32:
+        if MACOS:  # quantization only supported on macOS
+            with warnings.catch_warnings():
+                warnings.filterwarnings('ignore', category=DeprecationWarning)  # suppress numpy==1.20 float warning
+                ct_model = ct.models.neural_network.quantization_utils.quantize_weights(ct_model, bits, mode)
+        else:
+            print(f'{prefix} quantization only supported on macOS, skipping...')
+    ct_model.save(f)
+    return f, ct_model
+
+
+@try_export
+def export_engine(model, im, file, half, dynamic, simplify, workspace=4, verbose=False, prefix=colorstr('TensorRT:')):
+    # YOLOv5 TensorRT export https://developer.nvidia.com/tensorrt
+    assert im.device.type != 'cpu', 'export running on CPU but must be on GPU, i.e. `python export.py --device 0`'
+    try:
+        import tensorrt as trt
+    except Exception:
+        if platform.system() == 'Linux':
+            check_requirements('nvidia-tensorrt', cmds='-U --index-url https://pypi.ngc.nvidia.com')
+        import tensorrt as trt
+
+    if trt.__version__[0] == '7':  # TensorRT 7 handling https://github.com/ultralytics/yolov5/issues/6012
+        grid = model.model[-1].anchor_grid
+        model.model[-1].anchor_grid = [a[..., :1, :1, :] for a in grid]
+        export_onnx(model, im, file, 12, dynamic, simplify)  # opset 12
+        model.model[-1].anchor_grid = grid
+    else:  # TensorRT >= 8
+        check_version(trt.__version__, '8.0.0', hard=True)  # require tensorrt>=8.0.0
+        export_onnx(model, im, file, 12, dynamic, simplify)  # opset 12
+    onnx = file.with_suffix('.onnx')
+
+    LOGGER.info(f'\n{prefix} starting export with TensorRT {trt.__version__}...')
+    assert onnx.exists(), f'failed to export ONNX file: {onnx}'
+    f = file.with_suffix('.engine')  # TensorRT engine file
+    logger = trt.Logger(trt.Logger.INFO)
+    if verbose:
+        logger.min_severity = trt.Logger.Severity.VERBOSE
+
+    builder = trt.Builder(logger)
+    config = builder.create_builder_config()
+    config.max_workspace_size = workspace * 1 << 30
+    # config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace << 30)  # fix TRT 8.4 deprecation notice
+
+    flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
+    network = builder.create_network(flag)
+    parser = trt.OnnxParser(network, logger)
+    if not parser.parse_from_file(str(onnx)):
+        raise RuntimeError(f'failed to load ONNX file: {onnx}')
+
+    inputs = [network.get_input(i) for i in range(network.num_inputs)]
+    outputs = [network.get_output(i) for i in range(network.num_outputs)]
+    for inp in inputs:
+        LOGGER.info(f'{prefix} input "{inp.name}" with shape{inp.shape} {inp.dtype}')
+    for out in outputs:
+        LOGGER.info(f'{prefix} output "{out.name}" with shape{out.shape} {out.dtype}')
+
+    if dynamic:
+        if im.shape[0] <= 1:
+            LOGGER.warning(f'{prefix} WARNING ⚠️ --dynamic model requires maximum --batch-size argument')
+        profile = builder.create_optimization_profile()
+        for inp in inputs:
+            profile.set_shape(inp.name, (1, *im.shape[1:]), (max(1, im.shape[0] // 2), *im.shape[1:]), im.shape)
+        config.add_optimization_profile(profile)
+
+    LOGGER.info(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 and half else 32} engine as {f}')
+    if builder.platform_has_fast_fp16 and half:
+        config.set_flag(trt.BuilderFlag.FP16)
+    with builder.build_engine(network, config) as engine, open(f, 'wb') as t:
+        t.write(engine.serialize())
+    return f, None
+
+
+@try_export
+def export_saved_model(model,
+                       im,
+                       file,
+                       dynamic,
+                       tf_nms=False,
+                       agnostic_nms=False,
+                       topk_per_class=100,
+                       topk_all=100,
+                       iou_thres=0.45,
+                       conf_thres=0.25,
+                       keras=False,
+                       prefix=colorstr('TensorFlow SavedModel:')):
+    # YOLOv5 TensorFlow SavedModel export
+    try:
+        import tensorflow as tf
+    except Exception:
+        check_requirements(f"tensorflow{'' if torch.cuda.is_available() else '-macos' if MACOS else '-cpu'}")
+        import tensorflow as tf
+    from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
+
+    from models.tf import TFModel
+
+    LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
+    f = str(file).replace('.pt', '_saved_model')
+    batch_size, ch, *imgsz = list(im.shape)  # BCHW
+
+    tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
+    im = tf.zeros((batch_size, *imgsz, ch))  # BHWC order for TensorFlow
+    _ = tf_model.predict(im, tf_nms, agnostic_nms, topk_per_class, topk_all, iou_thres, conf_thres)
+    inputs = tf.keras.Input(shape=(*imgsz, ch), batch_size=None if dynamic else batch_size)
+    outputs = tf_model.predict(inputs, tf_nms, agnostic_nms, topk_per_class, topk_all, iou_thres, conf_thres)
+    keras_model = tf.keras.Model(inputs=inputs, outputs=outputs)
+    keras_model.trainable = False
+    keras_model.summary()
+    if keras:
+        keras_model.save(f, save_format='tf')
+    else:
+        spec = tf.TensorSpec(keras_model.inputs[0].shape, keras_model.inputs[0].dtype)
+        m = tf.function(lambda x: keras_model(x))  # full model
+        m = m.get_concrete_function(spec)
+        frozen_func = convert_variables_to_constants_v2(m)
+        tfm = tf.Module()
+        tfm.__call__ = tf.function(lambda x: frozen_func(x)[:4] if tf_nms else frozen_func(x), [spec])
+        tfm.__call__(im)
+        tf.saved_model.save(tfm,
+                            f,
+                            options=tf.saved_model.SaveOptions(experimental_custom_gradients=False) if check_version(
+                                tf.__version__, '2.6') else tf.saved_model.SaveOptions())
+    return f, keras_model
+
+
+@try_export
+def export_pb(keras_model, file, prefix=colorstr('TensorFlow GraphDef:')):
+    # YOLOv5 TensorFlow GraphDef *.pb export https://github.com/leimao/Frozen_Graph_TensorFlow
+    import tensorflow as tf
+    from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
+
+    LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
+    f = file.with_suffix('.pb')
+
+    m = tf.function(lambda x: keras_model(x))  # full model
+    m = m.get_concrete_function(tf.TensorSpec(keras_model.inputs[0].shape, keras_model.inputs[0].dtype))
+    frozen_func = convert_variables_to_constants_v2(m)
+    frozen_func.graph.as_graph_def()
+    tf.io.write_graph(graph_or_graph_def=frozen_func.graph, logdir=str(f.parent), name=f.name, as_text=False)
+    return f, None
+
+
+@try_export
+def export_tflite(keras_model, im, file, int8, data, nms, agnostic_nms, prefix=colorstr('TensorFlow Lite:')):
+    # YOLOv5 TensorFlow Lite export
+    import tensorflow as tf
+
+    LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
+    batch_size, ch, *imgsz = list(im.shape)  # BCHW
+    f = str(file).replace('.pt', '-fp16.tflite')
+
+    converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
+    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
+    converter.target_spec.supported_types = [tf.float16]
+    converter.optimizations = [tf.lite.Optimize.DEFAULT]
+    if int8:
+        from models.tf import representative_dataset_gen
+        dataset = LoadImages(check_dataset(check_yaml(data))['train'], img_size=imgsz, auto=False)
+        converter.representative_dataset = lambda: representative_dataset_gen(dataset, ncalib=100)
+        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
+        converter.target_spec.supported_types = []
+        converter.inference_input_type = tf.uint8  # or tf.int8
+        converter.inference_output_type = tf.uint8  # or tf.int8
+        converter.experimental_new_quantizer = True
+        f = str(file).replace('.pt', '-int8.tflite')
+    if nms or agnostic_nms:
+        converter.target_spec.supported_ops.append(tf.lite.OpsSet.SELECT_TF_OPS)
+
+    tflite_model = converter.convert()
+    open(f, 'wb').write(tflite_model)
+    return f, None
+
+
+@try_export
+def export_edgetpu(file, prefix=colorstr('Edge TPU:')):
+    # YOLOv5 Edge TPU export https://coral.ai/docs/edgetpu/models-intro/
+    cmd = 'edgetpu_compiler --version'
+    help_url = 'https://coral.ai/docs/edgetpu/compiler/'
+    assert platform.system() == 'Linux', f'export only supported on Linux. See {help_url}'
+    if subprocess.run(f'{cmd} > /dev/null 2>&1', shell=True).returncode != 0:
+        LOGGER.info(f'\n{prefix} export requires Edge TPU compiler. Attempting install from {help_url}')
+        sudo = subprocess.run('sudo --version >/dev/null', shell=True).returncode == 0  # sudo installed on system
+        for c in (
+                'curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -',
+                'echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | sudo tee /etc/apt/sources.list.d/coral-edgetpu.list',
+                'sudo apt-get update', 'sudo apt-get install edgetpu-compiler'):
+            subprocess.run(c if sudo else c.replace('sudo ', ''), shell=True, check=True)
+    ver = subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().split()[-1]
+
+    LOGGER.info(f'\n{prefix} starting export with Edge TPU compiler {ver}...')
+    f = str(file).replace('.pt', '-int8_edgetpu.tflite')  # Edge TPU model
+    f_tfl = str(file).replace('.pt', '-int8.tflite')  # TFLite model
+
+    subprocess.run([
+        'edgetpu_compiler',
+        '-s',
+        '-d',
+        '-k',
+        '10',
+        '--out_dir',
+        str(file.parent),
+        f_tfl,], check=True)
+    return f, None
+
+
+@try_export
+def export_tfjs(file, int8, prefix=colorstr('TensorFlow.js:')):
+    # YOLOv5 TensorFlow.js export
+    check_requirements('tensorflowjs')
+    import tensorflowjs as tfjs
+
+    LOGGER.info(f'\n{prefix} starting export with tensorflowjs {tfjs.__version__}...')
+    f = str(file).replace('.pt', '_web_model')  # js dir
+    f_pb = file.with_suffix('.pb')  # *.pb path
+    f_json = f'{f}/model.json'  # *.json path
+
+    args = [
+        'tensorflowjs_converter',
+        '--input_format=tf_frozen_model',
+        '--quantize_uint8' if int8 else '',
+        '--output_node_names=Identity,Identity_1,Identity_2,Identity_3',
+        str(f_pb),
+        str(f),]
+    subprocess.run([arg for arg in args if arg], check=True)
+
+    json = Path(f_json).read_text()
+    with open(f_json, 'w') as j:  # sort JSON Identity_* in ascending order
+        subst = re.sub(
+            r'{"outputs": {"Identity.?.?": {"name": "Identity.?.?"}, '
+            r'"Identity.?.?": {"name": "Identity.?.?"}, '
+            r'"Identity.?.?": {"name": "Identity.?.?"}, '
+            r'"Identity.?.?": {"name": "Identity.?.?"}}}', r'{"outputs": {"Identity": {"name": "Identity"}, '
+            r'"Identity_1": {"name": "Identity_1"}, '
+            r'"Identity_2": {"name": "Identity_2"}, '
+            r'"Identity_3": {"name": "Identity_3"}}}', json)
+        j.write(subst)
+    return f, None
+
+
+def add_tflite_metadata(file, metadata, num_outputs):
+    # Add metadata to *.tflite models per https://www.tensorflow.org/lite/models/convert/metadata
+    with contextlib.suppress(ImportError):
+        # check_requirements('tflite_support')
+        from tflite_support import flatbuffers
+        from tflite_support import metadata as _metadata
+        from tflite_support import metadata_schema_py_generated as _metadata_fb
+
+        tmp_file = Path('/tmp/meta.txt')
+        with open(tmp_file, 'w') as meta_f:
+            meta_f.write(str(metadata))
+
+        model_meta = _metadata_fb.ModelMetadataT()
+        label_file = _metadata_fb.AssociatedFileT()
+        label_file.name = tmp_file.name
+        model_meta.associatedFiles = [label_file]
+
+        subgraph = _metadata_fb.SubGraphMetadataT()
+        subgraph.inputTensorMetadata = [_metadata_fb.TensorMetadataT()]
+        subgraph.outputTensorMetadata = [_metadata_fb.TensorMetadataT()] * num_outputs
+        model_meta.subgraphMetadata = [subgraph]
+
+        b = flatbuffers.Builder(0)
+        b.Finish(model_meta.Pack(b), _metadata.MetadataPopulator.METADATA_FILE_IDENTIFIER)
+        metadata_buf = b.Output()
+
+        populator = _metadata.MetadataPopulator.with_model_file(file)
+        populator.load_metadata_buffer(metadata_buf)
+        populator.load_associated_files([str(tmp_file)])
+        populator.populate()
+        tmp_file.unlink()
+
+
+@smart_inference_mode()
+def run(
+        data=ROOT / 'data/coco128.yaml',  # 'dataset.yaml path'
+        weights=ROOT / 'yolov5s.pt',  # weights path
+        imgsz=(640, 640),  # image (height, width)
+        batch_size=1,  # batch size
+        device='cpu',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
+        include=('torchscript', 'onnx'),  # include formats
+        half=False,  # FP16 half-precision export
+        inplace=False,  # set YOLOv5 Detect() inplace=True
+        keras=False,  # use Keras
+        optimize=False,  # TorchScript: optimize for mobile
+        int8=False,  # CoreML/TF INT8 quantization
+        dynamic=False,  # ONNX/TF/TensorRT: dynamic axes
+        simplify=False,  # ONNX: simplify model
+        opset=12,  # ONNX: opset version
+        verbose=False,  # TensorRT: verbose log
+        workspace=4,  # TensorRT: workspace size (GB)
+        nms=False,  # TF: add NMS to model
+        agnostic_nms=False,  # TF: add agnostic NMS to model
+        topk_per_class=100,  # TF.js NMS: topk per class to keep
+        topk_all=100,  # TF.js NMS: topk for all classes to keep
+        iou_thres=0.45,  # TF.js NMS: IoU threshold
+        conf_thres=0.25,  # TF.js NMS: confidence threshold
+):
+    t = time.time()
+    include = [x.lower() for x in include]  # to lowercase
+    fmts = tuple(export_formats()['Argument'][1:])  # --include arguments
+    flags = [x in include for x in fmts]
+    assert sum(flags) == len(include), f'ERROR: Invalid --include {include}, valid --include arguments are {fmts}'
+    jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle = flags  # export booleans
+    file = Path(url2file(weights) if str(weights).startswith(('http:/', 'https:/')) else weights)  # PyTorch weights
+
+    # Load PyTorch model
+    device = select_device(device)
+    if half:
+        assert device.type != 'cpu' or coreml, '--half only compatible with GPU export, i.e. use --device 0'
+        assert not dynamic, '--half not compatible with --dynamic, i.e. use either --half or --dynamic but not both'
+    model = attempt_load(weights, device=device, inplace=True, fuse=True)  # load FP32 model
+
+    # Checks
+    imgsz *= 2 if len(imgsz) == 1 else 1  # expand
+    if optimize:
+        assert device.type == 'cpu', '--optimize not compatible with cuda devices, i.e. use --device cpu'
+
+    # Input
+    gs = int(max(model.stride))  # grid size (max stride)
+    imgsz = [check_img_size(x, gs) for x in imgsz]  # verify img_size are gs-multiples
+    im = torch.zeros(batch_size, 3, *imgsz).to(device)  # image size(1,3,320,192) BCHW iDetection
+
+    # Update model
+    model.eval()
+    for k, m in model.named_modules():
+        if isinstance(m, Detect):
+            m.inplace = inplace
+            m.dynamic = dynamic
+            m.export = True
+
+    for _ in range(2):
+        y = model(im)  # dry runs
+    if half and not coreml:
+        im, model = im.half(), model.half()  # to FP16
+    shape = tuple((y[0] if isinstance(y, tuple) else y).shape)  # model output shape
+    metadata = {'stride': int(max(model.stride)), 'names': model.names}  # model metadata
+    LOGGER.info(f"\n{colorstr('PyTorch:')} starting from {file} with output shape {shape} ({file_size(file):.1f} MB)")
+
+    # Exports
+    f = [''] * len(fmts)  # exported filenames
+    warnings.filterwarnings(action='ignore', category=torch.jit.TracerWarning)  # suppress TracerWarning
+    if jit:  # TorchScript
+        f[0], _ = export_torchscript(model, im, file, optimize)
+    if engine:  # TensorRT required before ONNX
+        f[1], _ = export_engine(model, im, file, half, dynamic, simplify, workspace, verbose)
+    if onnx or xml:  # OpenVINO requires ONNX
+        f[2], _ = export_onnx(model, im, file, opset, dynamic, simplify)
+    if xml:  # OpenVINO
+        f[3], _ = export_openvino(file, metadata, half)
+    if coreml:  # CoreML
+        f[4], _ = export_coreml(model, im, file, int8, half)
+    if any((saved_model, pb, tflite, edgetpu, tfjs)):  # TensorFlow formats
+        assert not tflite or not tfjs, 'TFLite and TF.js models must be exported separately, please pass only one type.'
+        assert not isinstance(model, ClassificationModel), 'ClassificationModel export to TF formats not yet supported.'
+        f[5], s_model = export_saved_model(model.cpu(),
+                                           im,
+                                           file,
+                                           dynamic,
+                                           tf_nms=nms or agnostic_nms or tfjs,
+                                           agnostic_nms=agnostic_nms or tfjs,
+                                           topk_per_class=topk_per_class,
+                                           topk_all=topk_all,
+                                           iou_thres=iou_thres,
+                                           conf_thres=conf_thres,
+                                           keras=keras)
+        if pb or tfjs:  # pb prerequisite to tfjs
+            f[6], _ = export_pb(s_model, file)
+        if tflite or edgetpu:
+            f[7], _ = export_tflite(s_model, im, file, int8 or edgetpu, data=data, nms=nms, agnostic_nms=agnostic_nms)
+            if edgetpu:
+                f[8], _ = export_edgetpu(file)
+            add_tflite_metadata(f[8] or f[7], metadata, num_outputs=len(s_model.outputs))
+        if tfjs:
+            f[9], _ = export_tfjs(file, int8)
+    if paddle:  # PaddlePaddle
+        f[10], _ = export_paddle(model, im, file, metadata)
+
+    # Finish
+    f = [str(x) for x in f if x]  # filter out '' and None
+    if any(f):
+        cls, det, seg = (isinstance(model, x) for x in (ClassificationModel, DetectionModel, SegmentationModel))  # type
+        det &= not seg  # segmentation models inherit from SegmentationModel(DetectionModel)
+        dir = Path('segment' if seg else 'classify' if cls else '')
+        h = '--half' if half else ''  # --half FP16 inference arg
+        s = '# WARNING ⚠️ ClassificationModel not yet supported for PyTorch Hub AutoShape inference' if cls else \
+            '# WARNING ⚠️ SegmentationModel not yet supported for PyTorch Hub AutoShape inference' if seg else ''
+        LOGGER.info(f'\nExport complete ({time.time() - t:.1f}s)'
+                    f"\nResults saved to {colorstr('bold', file.parent.resolve())}"
+                    f"\nDetect:          python {dir / ('detect.py' if det else 'predict.py')} --weights {f[-1]} {h}"
+                    f"\nValidate:        python {dir / 'val.py'} --weights {f[-1]} {h}"
+                    f"\nPyTorch Hub:     model = torch.hub.load('ultralytics/yolov5', 'custom', '{f[-1]}')  {s}"
+                    f'\nVisualize:       https://netron.app')
+    return f  # return list of exported files/dirs
+
+
+def parse_opt(known=False):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
+    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model.pt path(s)')
+    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640, 640], help='image (h, w)')
+    parser.add_argument('--batch-size', type=int, default=1, help='batch size')
+    parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--half', action='store_true', help='FP16 half-precision export')
+    parser.add_argument('--inplace', action='store_true', help='set YOLOv5 Detect() inplace=True')
+    parser.add_argument('--keras', action='store_true', help='TF: use Keras')
+    parser.add_argument('--optimize', action='store_true', help='TorchScript: optimize for mobile')
+    parser.add_argument('--int8', action='store_true', help='CoreML/TF INT8 quantization')
+    parser.add_argument('--dynamic', action='store_true', help='ONNX/TF/TensorRT: dynamic axes')
+    parser.add_argument('--simplify', action='store_true', help='ONNX: simplify model')
+    parser.add_argument('--opset', type=int, default=17, help='ONNX: opset version')
+    parser.add_argument('--verbose', action='store_true', help='TensorRT: verbose log')
+    parser.add_argument('--workspace', type=int, default=4, help='TensorRT: workspace size (GB)')
+    parser.add_argument('--nms', action='store_true', help='TF: add NMS to model')
+    parser.add_argument('--agnostic-nms', action='store_true', help='TF: add agnostic NMS to model')
+    parser.add_argument('--topk-per-class', type=int, default=100, help='TF.js NMS: topk per class to keep')
+    parser.add_argument('--topk-all', type=int, default=100, help='TF.js NMS: topk for all classes to keep')
+    parser.add_argument('--iou-thres', type=float, default=0.45, help='TF.js NMS: IoU threshold')
+    parser.add_argument('--conf-thres', type=float, default=0.25, help='TF.js NMS: confidence threshold')
+    parser.add_argument(
+        '--include',
+        nargs='+',
+        default=['torchscript'],
+        help='torchscript, onnx, openvino, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle')
+    opt = parser.parse_known_args()[0] if known else parser.parse_args()
+    print_args(vars(opt))
+    return opt
+
+
+def main(opt):
+    for opt.weights in (opt.weights if isinstance(opt.weights, list) else [opt.weights]):
+        run(**vars(opt))
+
+
+if __name__ == '__main__':
+    opt = parse_opt()
+    main(opt)
diff --git a/hubconf.py b/hubconf.py
index e42d0b59bd2a..41af8e39d14d 100644
--- a/hubconf.py
+++ b/hubconf.py
@@ -1,142 +1,169 @@
-"""YOLOv5 PyTorch Hub models https://pytorch.org/hub/ultralytics_yolov5/
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+PyTorch Hub models https://pytorch.org/hub/ultralytics_yolov5
 
 Usage:
     import torch
-    model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
+    model = torch.hub.load('ultralytics/yolov5', 'yolov5s')  # official model
+    model = torch.hub.load('ultralytics/yolov5:master', 'yolov5s')  # from branch
+    model = torch.hub.load('ultralytics/yolov5', 'custom', 'yolov5s.pt')  # custom/local model
+    model = torch.hub.load('.', 'custom', 'yolov5s.pt', source='local')  # local repo
 """
 
-from pathlib import Path
-
 import torch
 
-from models.yolo import Model
-from utils.general import check_requirements, set_logging
-from utils.google_utils import attempt_download
-from utils.torch_utils import select_device
-
-dependencies = ['torch', 'yaml']
-check_requirements(Path(__file__).parent / 'requirements.txt', exclude=('pycocotools', 'thop'))
-
 
-def create(name, pretrained, channels, classes, autoshape, verbose):
-    """Creates a specified YOLOv5 model
+def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None):
+    """Creates or loads a YOLOv5 model
 
     Arguments:
-        name (str): name of model, i.e. 'yolov5s'
+        name (str): model name 'yolov5s' or path 'path/to/best.pt'
         pretrained (bool): load pretrained weights into the model
         channels (int): number of input channels
         classes (int): number of model classes
+        autoshape (bool): apply YOLOv5 .autoshape() wrapper to model
+        verbose (bool): print all information to screen
+        device (str, torch.device, None): device to use for model parameters
 
     Returns:
-        pytorch model
+        YOLOv5 model
     """
+    from pathlib import Path
+
+    from models.common import AutoShape, DetectMultiBackend
+    from models.experimental import attempt_load
+    from models.yolo import ClassificationModel, DetectionModel, SegmentationModel
+    from utils.downloads import attempt_download
+    from utils.general import LOGGER, check_requirements, intersect_dicts, logging
+    from utils.torch_utils import select_device
+
+    if not verbose:
+        LOGGER.setLevel(logging.WARNING)
+    check_requirements(exclude=('opencv-python', 'tensorboard', 'thop'))
+    name = Path(name)
+    path = name.with_suffix('.pt') if name.suffix == '' and not name.is_dir() else name  # checkpoint path
     try:
-        set_logging(verbose=verbose)
-
-        cfg = list((Path(__file__).parent / 'models').rglob(f'{name}.yaml'))[0]  # model.yaml path
-        model = Model(cfg, channels, classes)
-        if pretrained:
-            fname = f'{name}.pt'  # checkpoint filename
-            attempt_download(fname)  # download if not found locally
-            ckpt = torch.load(fname, map_location=torch.device('cpu'))  # load
-            msd = model.state_dict()  # model state_dict
-            csd = ckpt['model'].float().state_dict()  # checkpoint state_dict as FP32
-            csd = {k: v for k, v in csd.items() if msd[k].shape == v.shape}  # filter
-            model.load_state_dict(csd, strict=False)  # load
-            if len(ckpt['model'].names) == classes:
-                model.names = ckpt['model'].names  # set class names attribute
-            if autoshape:
-                model = model.autoshape()  # for file/URI/PIL/cv2/np inputs and NMS
-        device = select_device('0' if torch.cuda.is_available() else 'cpu')  # default to GPU if available
+        device = select_device(device)
+        if pretrained and channels == 3 and classes == 80:
+            try:
+                model = DetectMultiBackend(path, device=device, fuse=autoshape)  # detection model
+                if autoshape:
+                    if model.pt and isinstance(model.model, ClassificationModel):
+                        LOGGER.warning('WARNING ⚠️ YOLOv5 ClassificationModel is not yet AutoShape compatible. '
+                                       'You must pass torch tensors in BCHW to this model, i.e. shape(1,3,224,224).')
+                    elif model.pt and isinstance(model.model, SegmentationModel):
+                        LOGGER.warning('WARNING ⚠️ YOLOv5 SegmentationModel is not yet AutoShape compatible. '
+                                       'You will not be able to run inference with this model.')
+                    else:
+                        model = AutoShape(model)  # for file/URI/PIL/cv2/np inputs and NMS
+            except Exception:
+                model = attempt_load(path, device=device, fuse=False)  # arbitrary model
+        else:
+            cfg = list((Path(__file__).parent / 'models').rglob(f'{path.stem}.yaml'))[0]  # model.yaml path
+            model = DetectionModel(cfg, channels, classes)  # create model
+            if pretrained:
+                ckpt = torch.load(attempt_download(path), map_location=device)  # load
+                csd = ckpt['model'].float().state_dict()  # checkpoint state_dict as FP32
+                csd = intersect_dicts(csd, model.state_dict(), exclude=['anchors'])  # intersect
+                model.load_state_dict(csd, strict=False)  # load
+                if len(ckpt['model'].names) == classes:
+                    model.names = ckpt['model'].names  # set class names attribute
+        if not verbose:
+            LOGGER.setLevel(logging.INFO)  # reset to default
         return model.to(device)
 
     except Exception as e:
         help_url = 'https://github.com/ultralytics/yolov5/issues/36'
-        s = 'Cache maybe be out of date, try force_reload=True. See %s for help.' % help_url
+        s = f'{e}. Cache may be out of date, try `force_reload=True` or see {help_url} for help.'
         raise Exception(s) from e
 
 
-def custom(path_or_model='path/to/model.pt', autoshape=True, verbose=True):
-    """YOLOv5-custom model https://github.com/ultralytics/yolov5
-
-    Arguments (3 options):
-        path_or_model (str): 'path/to/model.pt'
-        path_or_model (dict): torch.load('path/to/model.pt')
-        path_or_model (nn.Module): torch.load('path/to/model.pt')['model']
-
-    Returns:
-        pytorch model
-    """
-    set_logging(verbose=verbose)
+def custom(path='path/to/model.pt', autoshape=True, _verbose=True, device=None):
+    # YOLOv5 custom or local model
+    return _create(path, autoshape=autoshape, verbose=_verbose, device=device)
 
-    model = torch.load(path_or_model) if isinstance(path_or_model, str) else path_or_model  # load checkpoint
-    if isinstance(model, dict):
-        model = model['ema' if model.get('ema') else 'model']  # load model
 
-    hub_model = Model(model.yaml).to(next(model.parameters()).device)  # create
-    hub_model.load_state_dict(model.float().state_dict())  # load state_dict
-    hub_model.names = model.names  # class names
-    if autoshape:
-        hub_model = hub_model.autoshape()  # for file/URI/PIL/cv2/np inputs and NMS
-    device = select_device('0' if torch.cuda.is_available() else 'cpu')  # default to GPU if available
-    return hub_model.to(device)
+def yolov5n(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
+    # YOLOv5-nano model https://github.com/ultralytics/yolov5
+    return _create('yolov5n', pretrained, channels, classes, autoshape, _verbose, device)
 
 
-def yolov5s(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True):
+def yolov5s(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
     # YOLOv5-small model https://github.com/ultralytics/yolov5
-    return create('yolov5s', pretrained, channels, classes, autoshape, verbose)
+    return _create('yolov5s', pretrained, channels, classes, autoshape, _verbose, device)
 
 
-def yolov5m(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True):
+def yolov5m(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
     # YOLOv5-medium model https://github.com/ultralytics/yolov5
-    return create('yolov5m', pretrained, channels, classes, autoshape, verbose)
+    return _create('yolov5m', pretrained, channels, classes, autoshape, _verbose, device)
 
 
-def yolov5l(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True):
+def yolov5l(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
     # YOLOv5-large model https://github.com/ultralytics/yolov5
-    return create('yolov5l', pretrained, channels, classes, autoshape, verbose)
+    return _create('yolov5l', pretrained, channels, classes, autoshape, _verbose, device)
 
 
-def yolov5x(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True):
+def yolov5x(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
     # YOLOv5-xlarge model https://github.com/ultralytics/yolov5
-    return create('yolov5x', pretrained, channels, classes, autoshape, verbose)
+    return _create('yolov5x', pretrained, channels, classes, autoshape, _verbose, device)
+
 
+def yolov5n6(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
+    # YOLOv5-nano-P6 model https://github.com/ultralytics/yolov5
+    return _create('yolov5n6', pretrained, channels, classes, autoshape, _verbose, device)
 
-def yolov5s6(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True):
+
+def yolov5s6(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
     # YOLOv5-small-P6 model https://github.com/ultralytics/yolov5
-    return create('yolov5s6', pretrained, channels, classes, autoshape, verbose)
+    return _create('yolov5s6', pretrained, channels, classes, autoshape, _verbose, device)
 
 
-def yolov5m6(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True):
+def yolov5m6(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
     # YOLOv5-medium-P6 model https://github.com/ultralytics/yolov5
-    return create('yolov5m6', pretrained, channels, classes, autoshape, verbose)
+    return _create('yolov5m6', pretrained, channels, classes, autoshape, _verbose, device)
 
 
-def yolov5l6(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True):
+def yolov5l6(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
     # YOLOv5-large-P6 model https://github.com/ultralytics/yolov5
-    return create('yolov5l6', pretrained, channels, classes, autoshape, verbose)
+    return _create('yolov5l6', pretrained, channels, classes, autoshape, _verbose, device)
 
 
-def yolov5x6(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True):
+def yolov5x6(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
     # YOLOv5-xlarge-P6 model https://github.com/ultralytics/yolov5
-    return create('yolov5x6', pretrained, channels, classes, autoshape, verbose)
+    return _create('yolov5x6', pretrained, channels, classes, autoshape, _verbose, device)
 
 
 if __name__ == '__main__':
-    model = create(name='yolov5s', pretrained=True, channels=3, classes=80, autoshape=True, verbose=True)  # pretrained
-    # model = custom(path_or_model='path/to/model.pt')  # custom
+    import argparse
+    from pathlib import Path
 
-    # Verify inference
-    import cv2
     import numpy as np
     from PIL import Image
 
-    imgs = ['data/images/zidane.jpg',  # filename
-            'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg',  # URI
-            cv2.imread('data/images/bus.jpg')[:, :, ::-1],  # OpenCV
-            Image.open('data/images/bus.jpg'),  # PIL
-            np.zeros((320, 640, 3))]  # numpy
+    from utils.general import cv2, print_args
+
+    # Argparser
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--model', type=str, default='yolov5s', help='model name')
+    opt = parser.parse_args()
+    print_args(vars(opt))
+
+    # Model
+    model = _create(name=opt.model, pretrained=True, channels=3, classes=80, autoshape=True, verbose=True)
+    # model = custom(path='path/to/model.pt')  # custom
+
+    # Images
+    imgs = [
+        'data/images/zidane.jpg',  # filename
+        Path('data/images/zidane.jpg'),  # Path
+        'https://ultralytics.com/images/zidane.jpg',  # URI
+        cv2.imread('data/images/bus.jpg')[:, :, ::-1],  # OpenCV
+        Image.open('data/images/bus.jpg'),  # PIL
+        np.zeros((320, 640, 3))]  # numpy
+
+    # Inference
+    results = model(imgs, size=320)  # batched inference
 
-    results = model(imgs)  # batched inference
+    # Results
     results.print()
     results.save()
diff --git a/models/common.py b/models/common.py
index a28621904b0e..aa8ae674eb47 100644
--- a/models/common.py
+++ b/models/common.py
@@ -1,9 +1,21 @@
-# YOLOv5 common modules
-
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Common modules
+"""
+
+import ast
+import contextlib
+import json
 import math
+import platform
+import warnings
+import zipfile
+from collections import OrderedDict, namedtuple
 from copy import copy
 from pathlib import Path
+from urllib.parse import urlparse
 
+import cv2
 import numpy as np
 import pandas as pd
 import requests
@@ -12,39 +24,53 @@
 from PIL import Image
 from torch.cuda import amp
 
-from utils.datasets import letterbox
-from utils.general import non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh, save_one_box
-from utils.plots import color_list, plot_one_box
-from utils.torch_utils import time_synchronized
+from utils import TryExcept
+from utils.dataloaders import exif_transpose, letterbox
+from utils.general import (LOGGER, ROOT, Profile, check_requirements, check_suffix, check_version, colorstr,
+                           increment_path, is_jupyter, make_divisible, non_max_suppression, scale_boxes, xywh2xyxy,
+                           xyxy2xywh, yaml_load)
+from utils.plots import Annotator, colors, save_one_box
+from utils.torch_utils import copy_attr, smart_inference_mode
 
 
-def autopad(k, p=None):  # kernel, padding
-    # Pad to 'same'
+def autopad(k, p=None, d=1):  # kernel, padding, dilation
+    # Pad to 'same' shape outputs
+    if d > 1:
+        k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k]  # actual kernel-size
     if p is None:
         p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
     return p
 
 
-def DWConv(c1, c2, k=1, s=1, act=True):
-    # Depthwise convolution
-    return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
-
-
 class Conv(nn.Module):
-    # Standard convolution
-    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
-        super(Conv, self).__init__()
-        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
+    # Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)
+    default_act = nn.SiLU()  # default activation
+
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
+        super().__init__()
+        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
         self.bn = nn.BatchNorm2d(c2)
-        self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
+        self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
 
     def forward(self, x):
         return self.act(self.bn(self.conv(x)))
 
-    def fuseforward(self, x):
+    def forward_fuse(self, x):
         return self.act(self.conv(x))
 
 
+class DWConv(Conv):
+    # Depth-wise convolution
+    def __init__(self, c1, c2, k=1, s=1, d=1, act=True):  # ch_in, ch_out, kernel, stride, dilation, activation
+        super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
+
+
+class DWConvTranspose2d(nn.ConvTranspose2d):
+    # Depth-wise transpose convolution
+    def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):  # ch_in, ch_out, kernel, stride, padding, padding_out
+        super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
+
+
 class TransformerLayer(nn.Module):
     # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
     def __init__(self, c, num_heads):
@@ -70,31 +96,21 @@ def __init__(self, c1, c2, num_heads, num_layers):
         if c1 != c2:
             self.conv = Conv(c1, c2)
         self.linear = nn.Linear(c2, c2)  # learnable position embedding
-        self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
+        self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
         self.c2 = c2
 
     def forward(self, x):
         if self.conv is not None:
             x = self.conv(x)
         b, _, w, h = x.shape
-        p = x.flatten(2)
-        p = p.unsqueeze(0)
-        p = p.transpose(0, 3)
-        p = p.squeeze(3)
-        e = self.linear(p)
-        x = p + e
-
-        x = self.tr(x)
-        x = x.unsqueeze(3)
-        x = x.transpose(0, 3)
-        x = x.reshape(b, self.c2, w, h)
-        return x
+        p = x.flatten(2).permute(2, 0, 1)
+        return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)
 
 
 class Bottleneck(nn.Module):
     # Standard bottleneck
     def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
-        super(Bottleneck, self).__init__()
+        super().__init__()
         c_ = int(c2 * e)  # hidden channels
         self.cv1 = Conv(c1, c_, 1, 1)
         self.cv2 = Conv(c_, c2, 3, 1, g=g)
@@ -107,35 +123,56 @@ def forward(self, x):
 class BottleneckCSP(nn.Module):
     # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
-        super(BottleneckCSP, self).__init__()
+        super().__init__()
         c_ = int(c2 * e)  # hidden channels
         self.cv1 = Conv(c1, c_, 1, 1)
         self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
         self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
         self.cv4 = Conv(2 * c_, c2, 1, 1)
         self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
-        self.act = nn.LeakyReLU(0.1, inplace=True)
-        self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
+        self.act = nn.SiLU()
+        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
 
     def forward(self, x):
         y1 = self.cv3(self.m(self.cv1(x)))
         y2 = self.cv2(x)
-        return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
+        return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))
+
+
+class CrossConv(nn.Module):
+    # Cross Convolution Downsample
+    def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
+        # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, (1, k), (1, s))
+        self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
+        self.add = shortcut and c1 == c2
+
+    def forward(self, x):
+        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
 
 
 class C3(nn.Module):
     # CSP Bottleneck with 3 convolutions
     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
-        super(C3, self).__init__()
+        super().__init__()
         c_ = int(c2 * e)  # hidden channels
         self.cv1 = Conv(c1, c_, 1, 1)
         self.cv2 = Conv(c1, c_, 1, 1)
-        self.cv3 = Conv(2 * c_, c2, 1)  # act=FReLU(c2)
-        self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
-        # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
+        self.cv3 = Conv(2 * c_, c2, 1)  # optional act=FReLU(c2)
+        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
 
     def forward(self, x):
-        return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
+        return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
+
+
+class C3x(C3):
+    # C3 module with cross-convolutions
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
+        super().__init__(c1, c2, n, shortcut, g, e)
+        c_ = int(c2 * e)
+        self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
 
 
 class C3TR(C3):
@@ -146,10 +183,26 @@ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
         self.m = TransformerBlock(c_, c_, 4, n)
 
 
+class C3SPP(C3):
+    # C3 module with SPP()
+    def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
+        super().__init__(c1, c2, n, shortcut, g, e)
+        c_ = int(c2 * e)
+        self.m = SPP(c_, c_, k)
+
+
+class C3Ghost(C3):
+    # C3 module with GhostBottleneck()
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
+        super().__init__(c1, c2, n, shortcut, g, e)
+        c_ = int(c2 * e)  # hidden channels
+        self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
+
+
 class SPP(nn.Module):
-    # Spatial pyramid pooling layer used in YOLOv3-SPP
+    # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
     def __init__(self, c1, c2, k=(5, 9, 13)):
-        super(SPP, self).__init__()
+        super().__init__()
         c_ = c1 // 2  # hidden channels
         self.cv1 = Conv(c1, c_, 1, 1)
         self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
@@ -157,21 +210,70 @@ def __init__(self, c1, c2, k=(5, 9, 13)):
 
     def forward(self, x):
         x = self.cv1(x)
-        return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore')  # suppress torch 1.9.0 max_pool2d() warning
+            return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
+
+
+class SPPF(nn.Module):
+    # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
+    def __init__(self, c1, c2, k=5):  # equivalent to SPP(k=(5, 9, 13))
+        super().__init__()
+        c_ = c1 // 2  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_ * 4, c2, 1, 1)
+        self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
+
+    def forward(self, x):
+        x = self.cv1(x)
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore')  # suppress torch 1.9.0 max_pool2d() warning
+            y1 = self.m(x)
+            y2 = self.m(y1)
+            return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
 
 
 class Focus(nn.Module):
     # Focus wh information into c-space
     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
-        super(Focus, self).__init__()
-        self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
+        super().__init__()
+        self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
         # self.contract = Contract(gain=2)
 
     def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
-        return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
+        return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
         # return self.conv(self.contract(x))
 
 
+class GhostConv(nn.Module):
+    # Ghost Convolution https://github.com/huawei-noah/ghostnet
+    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
+        super().__init__()
+        c_ = c2 // 2  # hidden channels
+        self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
+        self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)
+
+    def forward(self, x):
+        y = self.cv1(x)
+        return torch.cat((y, self.cv2(y)), 1)
+
+
+class GhostBottleneck(nn.Module):
+    # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
+    def __init__(self, c1, c2, k=3, s=1):  # ch_in, ch_out, kernel, stride
+        super().__init__()
+        c_ = c2 // 2
+        self.conv = nn.Sequential(
+            GhostConv(c1, c_, 1, 1),  # pw
+            DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
+            GhostConv(c_, c2, 1, 1, act=False))  # pw-linear
+        self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1,
+                                                                            act=False)) if s == 2 else nn.Identity()
+
+    def forward(self, x):
+        return self.conv(x) + self.shortcut(x)
+
+
 class Contract(nn.Module):
     # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
     def __init__(self, gain=2):
@@ -179,11 +281,11 @@ def __init__(self, gain=2):
         self.gain = gain
 
     def forward(self, x):
-        N, C, H, W = x.size()  # assert (H / s == 0) and (W / s == 0), 'Indivisible gain'
+        b, c, h, w = x.size()  # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
         s = self.gain
-        x = x.view(N, C, H // s, s, W // s, s)  # x(1,64,40,2,40,2)
+        x = x.view(b, c, h // s, s, w // s, s)  # x(1,64,40,2,40,2)
         x = x.permute(0, 3, 5, 1, 2, 4).contiguous()  # x(1,2,2,64,40,40)
-        return x.view(N, C * s * s, H // s, W // s)  # x(1,256,40,40)
+        return x.view(b, c * s * s, h // s, w // s)  # x(1,256,40,40)
 
 
 class Expand(nn.Module):
@@ -193,171 +295,512 @@ def __init__(self, gain=2):
         self.gain = gain
 
     def forward(self, x):
-        N, C, H, W = x.size()  # assert C / s ** 2 == 0, 'Indivisible gain'
+        b, c, h, w = x.size()  # assert C / s ** 2 == 0, 'Indivisible gain'
         s = self.gain
-        x = x.view(N, s, s, C // s ** 2, H, W)  # x(1,2,2,16,80,80)
+        x = x.view(b, s, s, c // s ** 2, h, w)  # x(1,2,2,16,80,80)
         x = x.permute(0, 3, 4, 1, 5, 2).contiguous()  # x(1,16,80,2,80,2)
-        return x.view(N, C // s ** 2, H * s, W * s)  # x(1,16,160,160)
+        return x.view(b, c // s ** 2, h * s, w * s)  # x(1,16,160,160)
 
 
 class Concat(nn.Module):
     # Concatenate a list of tensors along dimension
     def __init__(self, dimension=1):
-        super(Concat, self).__init__()
+        super().__init__()
         self.d = dimension
 
     def forward(self, x):
         return torch.cat(x, self.d)
 
 
-class NMS(nn.Module):
-    # Non-Maximum Suppression (NMS) module
-    conf = 0.25  # confidence threshold
-    iou = 0.45  # IoU threshold
-    classes = None  # (optional list) filter by class
+class DetectMultiBackend(nn.Module):
+    # YOLOv5 MultiBackend class for python inference on various backends
+    def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True):
+        # Usage:
+        #   PyTorch:              weights = *.pt
+        #   TorchScript:                    *.torchscript
+        #   ONNX Runtime:                   *.onnx
+        #   ONNX OpenCV DNN:                *.onnx --dnn
+        #   OpenVINO:                       *_openvino_model
+        #   CoreML:                         *.mlmodel
+        #   TensorRT:                       *.engine
+        #   TensorFlow SavedModel:          *_saved_model
+        #   TensorFlow GraphDef:            *.pb
+        #   TensorFlow Lite:                *.tflite
+        #   TensorFlow Edge TPU:            *_edgetpu.tflite
+        #   PaddlePaddle:                   *_paddle_model
+        from models.experimental import attempt_download, attempt_load  # scoped to avoid circular import
 
-    def __init__(self):
-        super(NMS, self).__init__()
-
-    def forward(self, x):
-        return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
-
-
-class autoShape(nn.Module):
-    # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
+        super().__init__()
+        w = str(weights[0] if isinstance(weights, list) else weights)
+        pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(w)
+        fp16 &= pt or jit or onnx or engine  # FP16
+        nhwc = coreml or saved_model or pb or tflite or edgetpu  # BHWC formats (vs torch BCWH)
+        stride = 32  # default stride
+        cuda = torch.cuda.is_available() and device.type != 'cpu'  # use CUDA
+        if not (pt or triton):
+            w = attempt_download(w)  # download if not local
+
+        if pt:  # PyTorch
+            model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse)
+            stride = max(int(model.stride.max()), 32)  # model stride
+            names = model.module.names if hasattr(model, 'module') else model.names  # get class names
+            model.half() if fp16 else model.float()
+            self.model = model  # explicitly assign for to(), cpu(), cuda(), half()
+        elif jit:  # TorchScript
+            LOGGER.info(f'Loading {w} for TorchScript inference...')
+            extra_files = {'config.txt': ''}  # model metadata
+            model = torch.jit.load(w, _extra_files=extra_files, map_location=device)
+            model.half() if fp16 else model.float()
+            if extra_files['config.txt']:  # load metadata dict
+                d = json.loads(extra_files['config.txt'],
+                               object_hook=lambda d: {int(k) if k.isdigit() else k: v
+                                                      for k, v in d.items()})
+                stride, names = int(d['stride']), d['names']
+        elif dnn:  # ONNX OpenCV DNN
+            LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
+            check_requirements('opencv-python>=4.5.4')
+            net = cv2.dnn.readNetFromONNX(w)
+        elif onnx:  # ONNX Runtime
+            LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
+            check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
+            import onnxruntime
+            providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
+            session = onnxruntime.InferenceSession(w, providers=providers)
+            output_names = [x.name for x in session.get_outputs()]
+            meta = session.get_modelmeta().custom_metadata_map  # metadata
+            if 'stride' in meta:
+                stride, names = int(meta['stride']), eval(meta['names'])
+        elif xml:  # OpenVINO
+            LOGGER.info(f'Loading {w} for OpenVINO inference...')
+            check_requirements('openvino')  # requires openvino-dev: https://pypi.org/project/openvino-dev/
+            from openvino.runtime import Core, Layout, get_batch
+            ie = Core()
+            if not Path(w).is_file():  # if not *.xml
+                w = next(Path(w).glob('*.xml'))  # get *.xml file from *_openvino_model dir
+            network = ie.read_model(model=w, weights=Path(w).with_suffix('.bin'))
+            if network.get_parameters()[0].get_layout().empty:
+                network.get_parameters()[0].set_layout(Layout('NCHW'))
+            batch_dim = get_batch(network)
+            if batch_dim.is_static:
+                batch_size = batch_dim.get_length()
+            executable_network = ie.compile_model(network, device_name='CPU')  # device_name="MYRIAD" for Intel NCS2
+            stride, names = self._load_metadata(Path(w).with_suffix('.yaml'))  # load metadata
+        elif engine:  # TensorRT
+            LOGGER.info(f'Loading {w} for TensorRT inference...')
+            import tensorrt as trt  # https://developer.nvidia.com/nvidia-tensorrt-download
+            check_version(trt.__version__, '7.0.0', hard=True)  # require tensorrt>=7.0.0
+            if device.type == 'cpu':
+                device = torch.device('cuda:0')
+            Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
+            logger = trt.Logger(trt.Logger.INFO)
+            with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
+                model = runtime.deserialize_cuda_engine(f.read())
+            context = model.create_execution_context()
+            bindings = OrderedDict()
+            output_names = []
+            fp16 = False  # default updated below
+            dynamic = False
+            for i in range(model.num_bindings):
+                name = model.get_binding_name(i)
+                dtype = trt.nptype(model.get_binding_dtype(i))
+                if model.binding_is_input(i):
+                    if -1 in tuple(model.get_binding_shape(i)):  # dynamic
+                        dynamic = True
+                        context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[2]))
+                    if dtype == np.float16:
+                        fp16 = True
+                else:  # output
+                    output_names.append(name)
+                shape = tuple(context.get_binding_shape(i))
+                im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
+                bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
+            binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
+            batch_size = bindings['images'].shape[0]  # if dynamic, this is instead max batch size
+        elif coreml:  # CoreML
+            LOGGER.info(f'Loading {w} for CoreML inference...')
+            import coremltools as ct
+            model = ct.models.MLModel(w)
+        elif saved_model:  # TF SavedModel
+            LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...')
+            import tensorflow as tf
+            keras = False  # assume TF1 saved_model
+            model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
+        elif pb:  # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
+            LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
+            import tensorflow as tf
+
+            def wrap_frozen_graph(gd, inputs, outputs):
+                x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=''), [])  # wrapped
+                ge = x.graph.as_graph_element
+                return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
+
+            def gd_outputs(gd):
+                name_list, input_list = [], []
+                for node in gd.node:  # tensorflow.core.framework.node_def_pb2.NodeDef
+                    name_list.append(node.name)
+                    input_list.extend(node.input)
+                return sorted(f'{x}:0' for x in list(set(name_list) - set(input_list)) if not x.startswith('NoOp'))
+
+            gd = tf.Graph().as_graph_def()  # TF GraphDef
+            with open(w, 'rb') as f:
+                gd.ParseFromString(f.read())
+            frozen_func = wrap_frozen_graph(gd, inputs='x:0', outputs=gd_outputs(gd))
+        elif tflite or edgetpu:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
+            try:  # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
+                from tflite_runtime.interpreter import Interpreter, load_delegate
+            except ImportError:
+                import tensorflow as tf
+                Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,
+            if edgetpu:  # TF Edge TPU https://coral.ai/software/#edgetpu-runtime
+                LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
+                delegate = {
+                    'Linux': 'libedgetpu.so.1',
+                    'Darwin': 'libedgetpu.1.dylib',
+                    'Windows': 'edgetpu.dll'}[platform.system()]
+                interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])
+            else:  # TFLite
+                LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
+                interpreter = Interpreter(model_path=w)  # load TFLite model
+            interpreter.allocate_tensors()  # allocate
+            input_details = interpreter.get_input_details()  # inputs
+            output_details = interpreter.get_output_details()  # outputs
+            # load metadata
+            with contextlib.suppress(zipfile.BadZipFile):
+                with zipfile.ZipFile(w, 'r') as model:
+                    meta_file = model.namelist()[0]
+                    meta = ast.literal_eval(model.read(meta_file).decode('utf-8'))
+                    stride, names = int(meta['stride']), meta['names']
+        elif tfjs:  # TF.js
+            raise NotImplementedError('ERROR: YOLOv5 TF.js inference is not supported')
+        elif paddle:  # PaddlePaddle
+            LOGGER.info(f'Loading {w} for PaddlePaddle inference...')
+            check_requirements('paddlepaddle-gpu' if cuda else 'paddlepaddle')
+            import paddle.inference as pdi
+            if not Path(w).is_file():  # if not *.pdmodel
+                w = next(Path(w).rglob('*.pdmodel'))  # get *.pdmodel file from *_paddle_model dir
+            weights = Path(w).with_suffix('.pdiparams')
+            config = pdi.Config(str(w), str(weights))
+            if cuda:
+                config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0)
+            predictor = pdi.create_predictor(config)
+            input_handle = predictor.get_input_handle(predictor.get_input_names()[0])
+            output_names = predictor.get_output_names()
+        elif triton:  # NVIDIA Triton Inference Server
+            LOGGER.info(f'Using {w} as Triton Inference Server...')
+            check_requirements('tritonclient[all]')
+            from utils.triton import TritonRemoteModel
+            model = TritonRemoteModel(url=w)
+            nhwc = model.runtime.startswith('tensorflow')
+        else:
+            raise NotImplementedError(f'ERROR: {w} is not a supported format')
+
+        # class names
+        if 'names' not in locals():
+            names = yaml_load(data)['names'] if data else {i: f'class{i}' for i in range(999)}
+        if names[0] == 'n01440764' and len(names) == 1000:  # ImageNet
+            names = yaml_load(ROOT / 'data/ImageNet.yaml')['names']  # human-readable names
+
+        self.__dict__.update(locals())  # assign all variables to self
+
+    def forward(self, im, augment=False, visualize=False):
+        # YOLOv5 MultiBackend inference
+        b, ch, h, w = im.shape  # batch, channel, height, width
+        if self.fp16 and im.dtype != torch.float16:
+            im = im.half()  # to FP16
+        if self.nhwc:
+            im = im.permute(0, 2, 3, 1)  # torch BCHW to numpy BHWC shape(1,320,192,3)
+
+        if self.pt:  # PyTorch
+            y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
+        elif self.jit:  # TorchScript
+            y = self.model(im)
+        elif self.dnn:  # ONNX OpenCV DNN
+            im = im.cpu().numpy()  # torch to numpy
+            self.net.setInput(im)
+            y = self.net.forward()
+        elif self.onnx:  # ONNX Runtime
+            im = im.cpu().numpy()  # torch to numpy
+            y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
+        elif self.xml:  # OpenVINO
+            im = im.cpu().numpy()  # FP32
+            y = list(self.executable_network([im]).values())
+        elif self.engine:  # TensorRT
+            if self.dynamic and im.shape != self.bindings['images'].shape:
+                i = self.model.get_binding_index('images')
+                self.context.set_binding_shape(i, im.shape)  # reshape if dynamic
+                self.bindings['images'] = self.bindings['images']._replace(shape=im.shape)
+                for name in self.output_names:
+                    i = self.model.get_binding_index(name)
+                    self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i)))
+            s = self.bindings['images'].shape
+            assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
+            self.binding_addrs['images'] = int(im.data_ptr())
+            self.context.execute_v2(list(self.binding_addrs.values()))
+            y = [self.bindings[x].data for x in sorted(self.output_names)]
+        elif self.coreml:  # CoreML
+            im = im.cpu().numpy()
+            im = Image.fromarray((im[0] * 255).astype('uint8'))
+            # im = im.resize((192, 320), Image.ANTIALIAS)
+            y = self.model.predict({'image': im})  # coordinates are xywh normalized
+            if 'confidence' in y:
+                box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]])  # xyxy pixels
+                conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
+                y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
+            else:
+                y = list(reversed(y.values()))  # reversed for segmentation models (pred, proto)
+        elif self.paddle:  # PaddlePaddle
+            im = im.cpu().numpy().astype(np.float32)
+            self.input_handle.copy_from_cpu(im)
+            self.predictor.run()
+            y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names]
+        elif self.triton:  # NVIDIA Triton Inference Server
+            y = self.model(im)
+        else:  # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
+            im = im.cpu().numpy()
+            if self.saved_model:  # SavedModel
+                y = self.model(im, training=False) if self.keras else self.model(im)
+            elif self.pb:  # GraphDef
+                y = self.frozen_func(x=self.tf.constant(im))
+            else:  # Lite or Edge TPU
+                input = self.input_details[0]
+                int8 = input['dtype'] == np.uint8  # is TFLite quantized uint8 model
+                if int8:
+                    scale, zero_point = input['quantization']
+                    im = (im / scale + zero_point).astype(np.uint8)  # de-scale
+                self.interpreter.set_tensor(input['index'], im)
+                self.interpreter.invoke()
+                y = []
+                for output in self.output_details:
+                    x = self.interpreter.get_tensor(output['index'])
+                    if int8:
+                        scale, zero_point = output['quantization']
+                        x = (x.astype(np.float32) - zero_point) * scale  # re-scale
+                    y.append(x)
+            y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
+            y[0][..., :4] *= [w, h, w, h]  # xywh normalized to pixels
+
+        if isinstance(y, (list, tuple)):
+            return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
+        else:
+            return self.from_numpy(y)
+
+    def from_numpy(self, x):
+        return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x
+
+    def warmup(self, imgsz=(1, 3, 640, 640)):
+        # Warmup model by running inference once
+        warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton
+        if any(warmup_types) and (self.device.type != 'cpu' or self.triton):
+            im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device)  # input
+            for _ in range(2 if self.jit else 1):  #
+                self.forward(im)  # warmup
+
+    @staticmethod
+    def _model_type(p='path/to/model.pt'):
+        # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
+        # types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
+        from export import export_formats
+        from utils.downloads import is_url
+        sf = list(export_formats().Suffix)  # export suffixes
+        if not is_url(p, check=False):
+            check_suffix(p, sf)  # checks
+        url = urlparse(p)  # if url may be Triton inference server
+        types = [s in Path(p).name for s in sf]
+        types[8] &= not types[9]  # tflite &= not edgetpu
+        triton = not any(types) and all([any(s in url.scheme for s in ['http', 'grpc']), url.netloc])
+        return types + [triton]
+
+    @staticmethod
+    def _load_metadata(f=Path('path/to/meta.yaml')):
+        # Load metadata from meta.yaml if it exists
+        if f.exists():
+            d = yaml_load(f)
+            return d['stride'], d['names']  # assign stride, names
+        return None, None
+
+
+class AutoShape(nn.Module):
+    # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
     conf = 0.25  # NMS confidence threshold
     iou = 0.45  # NMS IoU threshold
-    classes = None  # (optional list) filter by class
+    agnostic = False  # NMS class-agnostic
+    multi_label = False  # NMS multiple labels per box
+    classes = None  # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
+    max_det = 1000  # maximum number of detections per image
+    amp = False  # Automatic Mixed Precision (AMP) inference
 
-    def __init__(self, model):
-        super(autoShape, self).__init__()
+    def __init__(self, model, verbose=True):
+        super().__init__()
+        if verbose:
+            LOGGER.info('Adding AutoShape... ')
+        copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=())  # copy attributes
+        self.dmb = isinstance(model, DetectMultiBackend)  # DetectMultiBackend() instance
+        self.pt = not self.dmb or model.pt  # PyTorch model
         self.model = model.eval()
-
-    def autoshape(self):
-        print('autoShape already enabled, skipping... ')  # model already converted to model.autoshape()
+        if self.pt:
+            m = self.model.model.model[-1] if self.dmb else self.model.model[-1]  # Detect()
+            m.inplace = False  # Detect.inplace=False for safe multithread inference
+            m.export = True  # do not output loss values
+
+    def _apply(self, fn):
+        # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
+        self = super()._apply(fn)
+        if self.pt:
+            m = self.model.model.model[-1] if self.dmb else self.model.model[-1]  # Detect()
+            m.stride = fn(m.stride)
+            m.grid = list(map(fn, m.grid))
+            if isinstance(m.anchor_grid, list):
+                m.anchor_grid = list(map(fn, m.anchor_grid))
         return self
 
-    @torch.no_grad()
-    def forward(self, imgs, size=640, augment=False, profile=False):
-        # Inference from various sources. For height=640, width=1280, RGB images example inputs are:
-        #   filename:   imgs = 'data/images/zidane.jpg'
-        #   URI:             = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg'
+    @smart_inference_mode()
+    def forward(self, ims, size=640, augment=False, profile=False):
+        # Inference from various sources. For size(height=640, width=1280), RGB images example inputs are:
+        #   file:        ims = 'data/images/zidane.jpg'  # str or PosixPath
+        #   URI:             = 'https://ultralytics.com/images/zidane.jpg'
         #   OpenCV:          = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(640,1280,3)
-        #   PIL:             = Image.open('image.jpg')  # HWC x(640,1280,3)
+        #   PIL:             = Image.open('image.jpg') or ImageGrab.grab()  # HWC x(640,1280,3)
         #   numpy:           = np.zeros((640,1280,3))  # HWC
         #   torch:           = torch.zeros(16,3,320,640)  # BCHW (scaled to size=640, 0-1 values)
         #   multiple:        = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images
 
-        t = [time_synchronized()]
-        p = next(self.model.parameters())  # for device and type
-        if isinstance(imgs, torch.Tensor):  # torch
-            with amp.autocast(enabled=p.device.type != 'cpu'):
-                return self.model(imgs.to(p.device).type_as(p), augment, profile)  # inference
-
-        # Pre-process
-        n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs])  # number of images, list of images
-        shape0, shape1, files = [], [], []  # image and inference shapes, filenames
-        for i, im in enumerate(imgs):
-            f = f'image{i}'  # filename
-            if isinstance(im, str):  # filename or uri
-                im, f = np.asarray(Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im)), im
-            elif isinstance(im, Image.Image):  # PIL Image
-                im, f = np.asarray(im), getattr(im, 'filename', f) or f
-            files.append(Path(f).with_suffix('.jpg').name)
-            if im.shape[0] < 5:  # image in CHW
-                im = im.transpose((1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)
-            im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3)  # enforce 3ch input
-            s = im.shape[:2]  # HWC
-            shape0.append(s)  # image shape
-            g = (size / max(s))  # gain
-            shape1.append([y * g for y in s])
-            imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im)  # update
-        shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)]  # inference shape
-        x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs]  # pad
-        x = np.stack(x, 0) if n > 1 else x[0][None]  # stack
-        x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
-        x = torch.from_numpy(x).to(p.device).type_as(p) / 255.  # uint8 to fp16/32
-        t.append(time_synchronized())
-
-        with amp.autocast(enabled=p.device.type != 'cpu'):
+        dt = (Profile(), Profile(), Profile())
+        with dt[0]:
+            if isinstance(size, int):  # expand
+                size = (size, size)
+            p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device)  # param
+            autocast = self.amp and (p.device.type != 'cpu')  # Automatic Mixed Precision (AMP) inference
+            if isinstance(ims, torch.Tensor):  # torch
+                with amp.autocast(autocast):
+                    return self.model(ims.to(p.device).type_as(p), augment=augment)  # inference
+
+            # Pre-process
+            n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims])  # number, list of images
+            shape0, shape1, files = [], [], []  # image and inference shapes, filenames
+            for i, im in enumerate(ims):
+                f = f'image{i}'  # filename
+                if isinstance(im, (str, Path)):  # filename or uri
+                    im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
+                    im = np.asarray(exif_transpose(im))
+                elif isinstance(im, Image.Image):  # PIL Image
+                    im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
+                files.append(Path(f).with_suffix('.jpg').name)
+                if im.shape[0] < 5:  # image in CHW
+                    im = im.transpose((1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)
+                im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR)  # enforce 3ch input
+                s = im.shape[:2]  # HWC
+                shape0.append(s)  # image shape
+                g = max(size) / max(s)  # gain
+                shape1.append([int(y * g) for y in s])
+                ims[i] = im if im.data.contiguous else np.ascontiguousarray(im)  # update
+            shape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)]  # inf shape
+            x = [letterbox(im, shape1, auto=False)[0] for im in ims]  # pad
+            x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2)))  # stack and BHWC to BCHW
+            x = torch.from_numpy(x).to(p.device).type_as(p) / 255  # uint8 to fp16/32
+
+        with amp.autocast(autocast):
             # Inference
-            y = self.model(x, augment, profile)[0]  # forward
-            t.append(time_synchronized())
+            with dt[1]:
+                y = self.model(x, augment=augment)  # forward
 
             # Post-process
-            y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)  # NMS
-            for i in range(n):
-                scale_coords(shape1, y[i][:, :4], shape0[i])
+            with dt[2]:
+                y = non_max_suppression(y if self.dmb else y[0],
+                                        self.conf,
+                                        self.iou,
+                                        self.classes,
+                                        self.agnostic,
+                                        self.multi_label,
+                                        max_det=self.max_det)  # NMS
+                for i in range(n):
+                    scale_boxes(shape1, y[i][:, :4], shape0[i])
 
-            t.append(time_synchronized())
-            return Detections(imgs, y, files, t, self.names, x.shape)
+            return Detections(ims, y, files, dt, self.names, x.shape)
 
 
 class Detections:
-    # detections class for YOLOv5 inference results
-    def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
-        super(Detections, self).__init__()
+    # YOLOv5 detections class for inference results
+    def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
+        super().__init__()
         d = pred[0].device  # device
-        gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs]  # normalizations
-        self.imgs = imgs  # list of images as numpy arrays
+        gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims]  # normalizations
+        self.ims = ims  # list of images as numpy arrays
         self.pred = pred  # list of tensors pred[0] = (xyxy, conf, cls)
         self.names = names  # class names
         self.files = files  # image filenames
+        self.times = times  # profiling times
         self.xyxy = pred  # xyxy pixels
         self.xywh = [xyxy2xywh(x) for x in pred]  # xywh pixels
         self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)]  # xyxy normalized
         self.xywhn = [x / g for x, g in zip(self.xywh, gn)]  # xywh normalized
         self.n = len(self.pred)  # number of images (batch size)
-        self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3))  # timestamps (ms)
-        self.s = shape  # inference BCHW shape
-
-    def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')):
-        colors = color_list()
-        for i, (im, pred) in enumerate(zip(self.imgs, self.pred)):
-            str = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} '
-            if pred is not None:
+        self.t = tuple(x.t / self.n * 1E3 for x in times)  # timestamps (ms)
+        self.s = tuple(shape)  # inference BCHW shape
+
+    def _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')):
+        s, crops = '', []
+        for i, (im, pred) in enumerate(zip(self.ims, self.pred)):
+            s += f'\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} '  # string
+            if pred.shape[0]:
                 for c in pred[:, -1].unique():
                     n = (pred[:, -1] == c).sum()  # detections per class
-                    str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "  # add to string
+                    s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "  # add to string
+                s = s.rstrip(', ')
                 if show or save or render or crop:
-                    for *box, conf, cls in pred:  # xyxy, confidence, class
+                    annotator = Annotator(im, example=str(self.names))
+                    for *box, conf, cls in reversed(pred):  # xyxy, confidence, class
                         label = f'{self.names[int(cls)]} {conf:.2f}'
                         if crop:
-                            save_one_box(box, im, file=save_dir / 'crops' / self.names[int(cls)] / self.files[i])
+                            file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
+                            crops.append({
+                                'box': box,
+                                'conf': conf,
+                                'cls': cls,
+                                'label': label,
+                                'im': save_one_box(box, im, file=file, save=save)})
                         else:  # all others
-                            plot_one_box(box, im, label=label, color=colors[int(cls) % 10])
+                            annotator.box_label(box, label if labels else '', color=colors(cls))
+                    im = annotator.im
+            else:
+                s += '(no detections)'
 
             im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im  # from np
-            if pprint:
-                print(str.rstrip(', '))
             if show:
-                im.show(self.files[i])  # show
+                if is_jupyter():
+                    from IPython.display import display
+                    display(im)
+                else:
+                    im.show(self.files[i])
             if save:
                 f = self.files[i]
                 im.save(save_dir / f)  # save
-                print(f"{'Saved' * (i == 0)} {f}", end=',' if i < self.n - 1 else f' to {save_dir}\n')
+                if i == self.n - 1:
+                    LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
             if render:
-                self.imgs[i] = np.asarray(im)
-
-    def print(self):
-        self.display(pprint=True)  # print results
-        print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t)
+                self.ims[i] = np.asarray(im)
+        if pprint:
+            s = s.lstrip('\n')
+            return f'{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}' % self.t
+        if crop:
+            if save:
+                LOGGER.info(f'Saved results to {save_dir}\n')
+            return crops
 
-    def show(self):
-        self.display(show=True)  # show results
+    @TryExcept('Showing images is not supported in this environment')
+    def show(self, labels=True):
+        self._run(show=True, labels=labels)  # show results
 
-    def save(self, save_dir='runs/hub/exp'):
-        save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/hub/exp', mkdir=True)  # increment save_dir
-        self.display(save=True, save_dir=save_dir)  # save results
+    def save(self, labels=True, save_dir='runs/detect/exp', exist_ok=False):
+        save_dir = increment_path(save_dir, exist_ok, mkdir=True)  # increment save_dir
+        self._run(save=True, labels=labels, save_dir=save_dir)  # save results
 
-    def crop(self, save_dir='runs/hub/exp'):
-        save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/hub/exp', mkdir=True)  # increment save_dir
-        self.display(crop=True, save_dir=save_dir)  # crop results
-        print(f'Saved results to {save_dir}\n')
+    def crop(self, save=True, save_dir='runs/detect/exp', exist_ok=False):
+        save_dir = increment_path(save_dir, exist_ok, mkdir=True) if save else None
+        return self._run(crop=True, save=save, save_dir=save_dir)  # crop results
 
-    def render(self):
-        self.display(render=True)  # render results
-        return self.imgs
+    def render(self, labels=True):
+        self._run(render=True, labels=labels)  # render results
+        return self.ims
 
     def pandas(self):
         # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
@@ -371,24 +814,57 @@ def pandas(self):
 
     def tolist(self):
         # return a list of Detections objects, i.e. 'for result in results.tolist():'
-        x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
-        for d in x:
-            for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
-                setattr(d, k, getattr(d, k)[0])  # pop out of list
+        r = range(self.n)  # iterable
+        x = [Detections([self.ims[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
+        # for d in x:
+        #    for k in ['ims', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
+        #        setattr(d, k, getattr(d, k)[0])  # pop out of list
         return x
 
-    def __len__(self):
+    def print(self):
+        LOGGER.info(self.__str__())
+
+    def __len__(self):  # override len(results)
         return self.n
 
+    def __str__(self):  # override print(results)
+        return self._run(pprint=True)  # print results
+
+    def __repr__(self):
+        return f'YOLOv5 {self.__class__} instance\n' + self.__str__()
+
+
+class Proto(nn.Module):
+    # YOLOv5 mask Proto module for segmentation models
+    def __init__(self, c1, c_=256, c2=32):  # ch_in, number of protos, number of masks
+        super().__init__()
+        self.cv1 = Conv(c1, c_, k=3)
+        self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
+        self.cv2 = Conv(c_, c_, k=3)
+        self.cv3 = Conv(c_, c2)
+
+    def forward(self, x):
+        return self.cv3(self.cv2(self.upsample(self.cv1(x))))
+
 
 class Classify(nn.Module):
-    # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
-    def __init__(self, c1, c2, k=1, s=1, p=None, g=1):  # ch_in, ch_out, kernel, stride, padding, groups
-        super(Classify, self).__init__()
-        self.aap = nn.AdaptiveAvgPool2d(1)  # to x(b,c1,1,1)
-        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g)  # to x(b,c2,1,1)
-        self.flat = nn.Flatten()
+    # YOLOv5 classification head, i.e. x(b,c1,20,20) to x(b,c2)
+    def __init__(self,
+                 c1,
+                 c2,
+                 k=1,
+                 s=1,
+                 p=None,
+                 g=1,
+                 dropout_p=0.0):  # ch_in, ch_out, kernel, stride, padding, groups, dropout probability
+        super().__init__()
+        c_ = 1280  # efficientnet_b0 size
+        self.conv = Conv(c1, c_, k, s, autopad(k, p), g)
+        self.pool = nn.AdaptiveAvgPool2d(1)  # to x(b,c_,1,1)
+        self.drop = nn.Dropout(p=dropout_p, inplace=True)
+        self.linear = nn.Linear(c_, c2)  # to x(b,c2)
 
     def forward(self, x):
-        z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1)  # cat if list
-        return self.flat(self.conv(z))  # flatten to x(b,c2)
+        if isinstance(x, list):
+            x = torch.cat(x, 1)
+        return self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))
diff --git a/models/experimental.py b/models/experimental.py
index 548353c93be0..02d35b9ebd11 100644
--- a/models/experimental.py
+++ b/models/experimental.py
@@ -1,35 +1,24 @@
-# YOLOv5 experimental modules
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Experimental modules
+"""
+import math
 
 import numpy as np
 import torch
 import torch.nn as nn
 
-from models.common import Conv, DWConv
-from utils.google_utils import attempt_download
-
-
-class CrossConv(nn.Module):
-    # Cross Convolution Downsample
-    def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
-        # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
-        super(CrossConv, self).__init__()
-        c_ = int(c2 * e)  # hidden channels
-        self.cv1 = Conv(c1, c_, (1, k), (1, s))
-        self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
-        self.add = shortcut and c1 == c2
-
-    def forward(self, x):
-        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+from utils.downloads import attempt_download
 
 
 class Sum(nn.Module):
     # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
     def __init__(self, n, weight=False):  # n: number of inputs
-        super(Sum, self).__init__()
+        super().__init__()
         self.weight = weight  # apply weights boolean
         self.iter = range(n - 1)  # iter object
         if weight:
-            self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True)  # layer weights
+            self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True)  # layer weights
 
     def forward(self, x):
         y = x[0]  # no weight
@@ -43,92 +32,80 @@ def forward(self, x):
         return y
 
 
-class GhostConv(nn.Module):
-    # Ghost Convolution https://github.com/huawei-noah/ghostnet
-    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
-        super(GhostConv, self).__init__()
-        c_ = c2 // 2  # hidden channels
-        self.cv1 = Conv(c1, c_, k, s, None, g, act)
-        self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
-
-    def forward(self, x):
-        y = self.cv1(x)
-        return torch.cat([y, self.cv2(y)], 1)
-
-
-class GhostBottleneck(nn.Module):
-    # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
-    def __init__(self, c1, c2, k=3, s=1):  # ch_in, ch_out, kernel, stride
-        super(GhostBottleneck, self).__init__()
-        c_ = c2 // 2
-        self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1),  # pw
-                                  DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
-                                  GhostConv(c_, c2, 1, 1, act=False))  # pw-linear
-        self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
-                                      Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
-
-    def forward(self, x):
-        return self.conv(x) + self.shortcut(x)
-
-
 class MixConv2d(nn.Module):
-    # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
-    def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
-        super(MixConv2d, self).__init__()
-        groups = len(k)
+    # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595
+    def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):  # ch_in, ch_out, kernel, stride, ch_strategy
+        super().__init__()
+        n = len(k)  # number of convolutions
         if equal_ch:  # equal c_ per group
-            i = torch.linspace(0, groups - 1E-6, c2).floor()  # c2 indices
-            c_ = [(i == g).sum() for g in range(groups)]  # intermediate channels
+            i = torch.linspace(0, n - 1E-6, c2).floor()  # c2 indices
+            c_ = [(i == g).sum() for g in range(n)]  # intermediate channels
         else:  # equal weight.numel() per group
-            b = [c2] + [0] * groups
-            a = np.eye(groups + 1, groups, k=-1)
+            b = [c2] + [0] * n
+            a = np.eye(n + 1, n, k=-1)
             a -= np.roll(a, 1, axis=1)
             a *= np.array(k) ** 2
             a[0] = 1
             c_ = np.linalg.lstsq(a, b, rcond=None)[0].round()  # solve for equal weight indices, ax = b
 
-        self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
+        self.m = nn.ModuleList([
+            nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)])
         self.bn = nn.BatchNorm2d(c2)
-        self.act = nn.LeakyReLU(0.1, inplace=True)
+        self.act = nn.SiLU()
 
     def forward(self, x):
-        return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
+        return self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
 
 
 class Ensemble(nn.ModuleList):
     # Ensemble of models
     def __init__(self):
-        super(Ensemble, self).__init__()
+        super().__init__()
 
-    def forward(self, x, augment=False):
-        y = []
-        for module in self:
-            y.append(module(x, augment)[0])
+    def forward(self, x, augment=False, profile=False, visualize=False):
+        y = [module(x, augment, profile, visualize)[0] for module in self]
         # y = torch.stack(y).max(0)[0]  # max ensemble
         # y = torch.stack(y).mean(0)  # mean ensemble
         y = torch.cat(y, 1)  # nms ensemble
         return y, None  # inference, train output
 
 
-def attempt_load(weights, map_location=None):
+def attempt_load(weights, device=None, inplace=True, fuse=True):
     # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
+    from models.yolo import Detect, Model
+
     model = Ensemble()
     for w in weights if isinstance(weights, list) else [weights]:
-        attempt_download(w)
-        ckpt = torch.load(w, map_location=map_location)  # load
-        model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval())  # FP32 model
+        ckpt = torch.load(attempt_download(w), map_location='cpu')  # load
+        ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float()  # FP32 model
 
-    # Compatibility updates
-    for m in model.modules():
-        if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
-            m.inplace = True  # pytorch 1.7.0 compatibility
-        elif type(m) is Conv:
-            m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
+        # Model compatibility updates
+        if not hasattr(ckpt, 'stride'):
+            ckpt.stride = torch.tensor([32.])
+        if hasattr(ckpt, 'names') and isinstance(ckpt.names, (list, tuple)):
+            ckpt.names = dict(enumerate(ckpt.names))  # convert to dict
 
+        model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, 'fuse') else ckpt.eval())  # model in eval mode
+
+    # Module compatibility updates
+    for m in model.modules():
+        t = type(m)
+        if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model):
+            m.inplace = inplace  # torch 1.7.0 compatibility
+            if t is Detect and not isinstance(m.anchor_grid, list):
+                delattr(m, 'anchor_grid')
+                setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl)
+        elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'):
+            m.recompute_scale_factor = None  # torch 1.11.0 compatibility
+
+    # Return model
     if len(model) == 1:
-        return model[-1]  # return model
-    else:
-        print('Ensemble created with %s\n' % weights)
-        for k in ['names', 'stride']:
-            setattr(model, k, getattr(model[-1], k))
-        return model  # return ensemble
+        return model[-1]
+
+    # Return detection ensemble
+    print(f'Ensemble created with {weights}\n')
+    for k in 'names', 'nc', 'yaml':
+        setattr(model, k, getattr(model[0], k))
+    model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride  # max stride
+    assert all(model[0].nc == m.nc for m in model), f'Models have different class counts: {[m.nc for m in model]}'
+    return model
diff --git a/models/export.py b/models/export.py
deleted file mode 100644
index da15079149a1..000000000000
--- a/models/export.py
+++ /dev/null
@@ -1,127 +0,0 @@
-"""Exports a YOLOv5 *.pt model to ONNX and TorchScript formats
-
-Usage:
-    $ export PYTHONPATH="$PWD" && python models/export.py --weights yolov5s.pt --img 640 --batch 1
-"""
-
-import argparse
-import sys
-import time
-from pathlib import Path
-
-sys.path.append(Path(__file__).parent.parent.absolute().__str__())  # to run '$ python *.py' files in subdirectories
-
-import torch
-import torch.nn as nn
-from torch.utils.mobile_optimizer import optimize_for_mobile
-
-import models
-from models.experimental import attempt_load
-from utils.activations import Hardswish, SiLU
-from utils.general import colorstr, check_img_size, check_requirements, file_size, set_logging
-from utils.torch_utils import select_device
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path')
-    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')  # height, width
-    parser.add_argument('--batch-size', type=int, default=1, help='batch size')
-    parser.add_argument('--grid', action='store_true', help='export Detect() layer grid')
-    parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
-    parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes')  # ONNX-only
-    parser.add_argument('--simplify', action='store_true', help='simplify ONNX model')  # ONNX-only
-    opt = parser.parse_args()
-    opt.img_size *= 2 if len(opt.img_size) == 1 else 1  # expand
-    print(opt)
-    set_logging()
-    t = time.time()
-
-    # Load PyTorch model
-    device = select_device(opt.device)
-    model = attempt_load(opt.weights, map_location=device)  # load FP32 model
-    labels = model.names
-
-    # Checks
-    gs = int(max(model.stride))  # grid size (max stride)
-    opt.img_size = [check_img_size(x, gs) for x in opt.img_size]  # verify img_size are gs-multiples
-
-    # Input
-    img = torch.zeros(opt.batch_size, 3, *opt.img_size).to(device)  # image size(1,3,320,192) iDetection
-
-    # Update model
-    for k, m in model.named_modules():
-        m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
-        if isinstance(m, models.common.Conv):  # assign export-friendly activations
-            if isinstance(m.act, nn.Hardswish):
-                m.act = Hardswish()
-            elif isinstance(m.act, nn.SiLU):
-                m.act = SiLU()
-        # elif isinstance(m, models.yolo.Detect):
-        #     m.forward = m.forward_export  # assign forward (optional)
-    model.model[-1].export = not opt.grid  # set Detect() layer grid export
-    for _ in range(2):
-        y = model(img)  # dry runs
-    print(f"\n{colorstr('PyTorch:')} starting from {opt.weights} ({file_size(opt.weights):.1f} MB)")
-
-    # TorchScript export -----------------------------------------------------------------------------------------------
-    prefix = colorstr('TorchScript:')
-    try:
-        print(f'\n{prefix} starting export with torch {torch.__version__}...')
-        f = opt.weights.replace('.pt', '.torchscript.pt')  # filename
-        ts = torch.jit.trace(model, img, strict=False)
-        ts = optimize_for_mobile(ts)  # https://pytorch.org/tutorials/recipes/script_optimized.html
-        ts.save(f)
-        print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
-    except Exception as e:
-        print(f'{prefix} export failure: {e}')
-
-    # ONNX export ------------------------------------------------------------------------------------------------------
-    prefix = colorstr('ONNX:')
-    try:
-        import onnx
-
-        print(f'{prefix} starting export with onnx {onnx.__version__}...')
-        f = opt.weights.replace('.pt', '.onnx')  # filename
-        torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
-                          dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'},  # size(1,3,640,640)
-                                        'output': {0: 'batch', 2: 'y', 3: 'x'}} if opt.dynamic else None)
-
-        # Checks
-        model_onnx = onnx.load(f)  # load onnx model
-        onnx.checker.check_model(model_onnx)  # check onnx model
-        # print(onnx.helper.printable_graph(model_onnx.graph))  # print
-
-        # Simplify
-        if opt.simplify:
-            try:
-                check_requirements(['onnx-simplifier'])
-                import onnxsim
-
-                print(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...')
-                model_onnx, check = onnxsim.simplify(model_onnx,
-                                                     dynamic_input_shape=opt.dynamic,
-                                                     input_shapes={'images': list(img.shape)} if opt.dynamic else None)
-                assert check, 'assert check failed'
-                onnx.save(model_onnx, f)
-            except Exception as e:
-                print(f'{prefix} simplifier failure: {e}')
-        print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
-    except Exception as e:
-        print(f'{prefix} export failure: {e}')
-
-    # CoreML export ----------------------------------------------------------------------------------------------------
-    prefix = colorstr('CoreML:')
-    try:
-        import coremltools as ct
-
-        print(f'{prefix} starting export with coremltools {ct.__version__}...')
-        # convert model from torchscript and apply pixel scaling as per detect.py
-        model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
-        f = opt.weights.replace('.pt', '.mlmodel')  # filename
-        model.save(f)
-        print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
-    except Exception as e:
-        print(f'{prefix} export failure: {e}')
-
-    # Finish
-    print(f'\nExport complete ({time.time() - t:.2f}s). Visualize with https://github.com/lutzroeder/netron.')
diff --git a/models/hub/anchors.yaml b/models/hub/anchors.yaml
index a07a4dc72387..e4d7beb06e07 100644
--- a/models/hub/anchors.yaml
+++ b/models/hub/anchors.yaml
@@ -1,58 +1,59 @@
-# Default YOLOv5 anchors for COCO data
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Default anchors for COCO data
 
 
 # P5 -------------------------------------------------------------------------------------------------------------------
 # P5-640:
 anchors_p5_640:
-  - [ 10,13, 16,30, 33,23 ]  # P3/8
-  - [ 30,61, 62,45, 59,119 ]  # P4/16
-  - [ 116,90, 156,198, 373,326 ]  # P5/32
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
 
 
 # P6 -------------------------------------------------------------------------------------------------------------------
 # P6-640:  thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11,  21,19,  17,41,  43,32,  39,70,  86,64,  65,131,  134,130,  120,265,  282,180,  247,354,  512,387
 anchors_p6_640:
-  - [ 9,11,  21,19,  17,41 ]  # P3/8
-  - [ 43,32,  39,70,  86,64 ]  # P4/16
-  - [ 65,131,  134,130,  120,265 ]  # P5/32
-  - [ 282,180,  247,354,  512,387 ]  # P6/64
+  - [9,11,  21,19,  17,41]  # P3/8
+  - [43,32,  39,70,  86,64]  # P4/16
+  - [65,131,  134,130,  120,265]  # P5/32
+  - [282,180,  247,354,  512,387]  # P6/64
 
 # P6-1280:  thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27,  44,40,  38,94,  96,68,  86,152,  180,137,  140,301,  303,264,  238,542,  436,615,  739,380,  925,792
 anchors_p6_1280:
-  - [ 19,27,  44,40,  38,94 ]  # P3/8
-  - [ 96,68,  86,152,  180,137 ]  # P4/16
-  - [ 140,301,  303,264,  238,542 ]  # P5/32
-  - [ 436,615,  739,380,  925,792 ]  # P6/64
+  - [19,27,  44,40,  38,94]  # P3/8
+  - [96,68,  86,152,  180,137]  # P4/16
+  - [140,301,  303,264,  238,542]  # P5/32
+  - [436,615,  739,380,  925,792]  # P6/64
 
 # P6-1920:  thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41,  67,59,  57,141,  144,103,  129,227,  270,205,  209,452,  455,396,  358,812,  653,922,  1109,570,  1387,1187
 anchors_p6_1920:
-  - [ 28,41,  67,59,  57,141 ]  # P3/8
-  - [ 144,103,  129,227,  270,205 ]  # P4/16
-  - [ 209,452,  455,396,  358,812 ]  # P5/32
-  - [ 653,922,  1109,570,  1387,1187 ]  # P6/64
+  - [28,41,  67,59,  57,141]  # P3/8
+  - [144,103,  129,227,  270,205]  # P4/16
+  - [209,452,  455,396,  358,812]  # P5/32
+  - [653,922,  1109,570,  1387,1187]  # P6/64
 
 
 # P7 -------------------------------------------------------------------------------------------------------------------
 # P7-640:  thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11,  13,30,  29,20,  30,46,  61,38,  39,92,  78,80,  146,66,  79,163,  149,150,  321,143,  157,303,  257,402,  359,290,  524,372
 anchors_p7_640:
-  - [ 11,11,  13,30,  29,20 ]  # P3/8
-  - [ 30,46,  61,38,  39,92 ]  # P4/16
-  - [ 78,80,  146,66,  79,163 ]  # P5/32
-  - [ 149,150,  321,143,  157,303 ]  # P6/64
-  - [ 257,402,  359,290,  524,372 ]  # P7/128
+  - [11,11,  13,30,  29,20]  # P3/8
+  - [30,46,  61,38,  39,92]  # P4/16
+  - [78,80,  146,66,  79,163]  # P5/32
+  - [149,150,  321,143,  157,303]  # P6/64
+  - [257,402,  359,290,  524,372]  # P7/128
 
 # P7-1280:  thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22,  54,36,  32,77,  70,83,  138,71,  75,173,  165,159,  148,334,  375,151,  334,317,  251,626,  499,474,  750,326,  534,814,  1079,818
 anchors_p7_1280:
-  - [ 19,22,  54,36,  32,77 ]  # P3/8
-  - [ 70,83,  138,71,  75,173 ]  # P4/16
-  - [ 165,159,  148,334,  375,151 ]  # P5/32
-  - [ 334,317,  251,626,  499,474 ]  # P6/64
-  - [ 750,326,  534,814,  1079,818 ]  # P7/128
+  - [19,22,  54,36,  32,77]  # P3/8
+  - [70,83,  138,71,  75,173]  # P4/16
+  - [165,159,  148,334,  375,151]  # P5/32
+  - [334,317,  251,626,  499,474]  # P6/64
+  - [750,326,  534,814,  1079,818]  # P7/128
 
 # P7-1920:  thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34,  81,55,  47,115,  105,124,  207,107,  113,259,  247,238,  222,500,  563,227,  501,476,  376,939,  749,711,  1126,489,  801,1222,  1618,1227
 anchors_p7_1920:
-  - [ 29,34,  81,55,  47,115 ]  # P3/8
-  - [ 105,124,  207,107,  113,259 ]  # P4/16
-  - [ 247,238,  222,500,  563,227 ]  # P5/32
-  - [ 501,476,  376,939,  749,711 ]  # P6/64
-  - [ 1126,489,  801,1222,  1618,1227 ]  # P7/128
+  - [29,34,  81,55,  47,115]  # P3/8
+  - [105,124,  207,107,  113,259]  # P4/16
+  - [247,238,  222,500,  563,227]  # P5/32
+  - [501,476,  376,939,  749,711]  # P6/64
+  - [1126,489,  801,1222,  1618,1227]  # P7/128
diff --git a/models/hub/yolov3-spp.yaml b/models/hub/yolov3-spp.yaml
index 38dcc449f0d0..c66982158ce8 100644
--- a/models/hub/yolov3-spp.yaml
+++ b/models/hub/yolov3-spp.yaml
@@ -1,9 +1,9 @@
-# parameters
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple
-
-# anchors
 anchors:
   - [10,13, 16,30, 33,23]  # P3/8
   - [30,61, 62,45, 59,119]  # P4/16
diff --git a/models/hub/yolov3-tiny.yaml b/models/hub/yolov3-tiny.yaml
index ff7638cad3be..b28b44315248 100644
--- a/models/hub/yolov3-tiny.yaml
+++ b/models/hub/yolov3-tiny.yaml
@@ -1,9 +1,9 @@
-# parameters
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple
-
-# anchors
 anchors:
   - [10,14, 23,27, 37,58]  # P4/16
   - [81,82, 135,169, 344,319]  # P5/32
diff --git a/models/hub/yolov3.yaml b/models/hub/yolov3.yaml
index f2e761355469..d1ef91290a8d 100644
--- a/models/hub/yolov3.yaml
+++ b/models/hub/yolov3.yaml
@@ -1,9 +1,9 @@
-# parameters
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple
-
-# anchors
 anchors:
   - [10,13, 16,30, 33,23]  # P3/8
   - [30,61, 62,45, 59,119]  # P4/16
@@ -28,7 +28,7 @@ backbone:
 # YOLOv3 head
 head:
   [[-1, 1, Bottleneck, [1024, False]],
-   [-1, 1, Conv, [512, [1, 1]]],
+   [-1, 1, Conv, [512, 1, 1]],
    [-1, 1, Conv, [1024, 3, 1]],
    [-1, 1, Conv, [512, 1, 1]],
    [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)
diff --git a/models/hub/yolov5-bifpn.yaml b/models/hub/yolov5-bifpn.yaml
new file mode 100644
index 000000000000..504815f5cfa0
--- /dev/null
+++ b/models/hub/yolov5-bifpn.yaml
@@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 BiFPN head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14, 6], 1, Concat, [1]],  # cat P4 <--- BiFPN change
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
diff --git a/models/hub/yolov5-fpn.yaml b/models/hub/yolov5-fpn.yaml
index e772bffecbbc..a23e9c6fbf9f 100644
--- a/models/hub/yolov5-fpn.yaml
+++ b/models/hub/yolov5-fpn.yaml
@@ -1,42 +1,42 @@
-# parameters
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple
-
-# anchors
 anchors:
   - [10,13, 16,30, 33,23]  # P3/8
   - [30,61, 62,45, 59,119]  # P4/16
   - [116,90, 156,198, 373,326]  # P5/32
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
-   [-1, 3, Bottleneck, [128]],
+   [-1, 3, C3, [128]],
    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 9, BottleneckCSP, [256]],
+   [-1, 6, C3, [256]],
    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
-   [-1, 9, BottleneckCSP, [512]],
+   [-1, 9, C3, [512]],
    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 6, BottleneckCSP, [1024]],  # 9
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
   ]
 
-# YOLOv5 FPN head
+# YOLOv5 v6.0 FPN head
 head:
-  [[-1, 3, BottleneckCSP, [1024, False]],  # 10 (P5/32-large)
+  [[-1, 3, C3, [1024, False]],  # 10 (P5/32-large)
 
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
    [-1, 1, Conv, [512, 1, 1]],
-   [-1, 3, BottleneckCSP, [512, False]],  # 14 (P4/16-medium)
+   [-1, 3, C3, [512, False]],  # 14 (P4/16-medium)
 
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
    [-1, 1, Conv, [256, 1, 1]],
-   [-1, 3, BottleneckCSP, [256, False]],  # 18 (P3/8-small)
+   [-1, 3, C3, [256, False]],  # 18 (P3/8-small)
 
    [[18, 14, 10], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
   ]
diff --git a/models/hub/yolov5-p2.yaml b/models/hub/yolov5-p2.yaml
index 0633a90fd065..554117dda59a 100644
--- a/models/hub/yolov5-p2.yaml
+++ b/models/hub/yolov5-p2.yaml
@@ -1,54 +1,54 @@
-# parameters
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple
+anchors: 3  # AutoAnchor evolves 3 anchors per P output layer
 
-# anchors
-anchors: 3
-
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
-    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
-    [ -1, 3, C3, [ 128 ] ],
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
-    [ -1, 9, C3, [ 256 ] ],
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
-    [ -1, 9, C3, [ 512 ] ],
-    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 7-P5/32
-    [ -1, 1, SPP, [ 1024, [ 5, 9, 13 ] ] ],
-    [ -1, 3, C3, [ 1024, False ] ],  # 9
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head with (P2, P3, P4, P5) outputs
 head:
-  [ [ -1, 1, Conv, [ 512, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
-    [ -1, 3, C3, [ 512, False ] ],  # 13
-
-    [ -1, 1, Conv, [ 256, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
-    [ -1, 3, C3, [ 256, False ] ],  # 17 (P3/8-small)
-
-    [ -1, 1, Conv, [ 128, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 2 ], 1, Concat, [ 1 ] ],  # cat backbone P2
-    [ -1, 1, C3, [ 128, False ] ],  # 21 (P2/4-xsmall)
-
-    [ -1, 1, Conv, [ 128, 3, 2 ] ],
-    [ [ -1, 18 ], 1, Concat, [ 1 ] ],  # cat head P3
-    [ -1, 3, C3, [ 256, False ] ],  # 24 (P3/8-small)
-
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],
-    [ [ -1, 14 ], 1, Concat, [ 1 ] ],  # cat head P4
-    [ -1, 3, C3, [ 512, False ] ],  # 27 (P4/16-medium)
-
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],
-    [ [ -1, 10 ], 1, Concat, [ 1 ] ],  # cat head P5
-    [ -1, 3, C3, [ 1024, False ] ],  # 30 (P5/32-large)
-
-    [ [ 24, 27, 30 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5)
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [128, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 2], 1, Concat, [1]],  # cat backbone P2
+   [-1, 1, C3, [128, False]],  # 21 (P2/4-xsmall)
+
+   [-1, 1, Conv, [128, 3, 2]],
+   [[-1, 18], 1, Concat, [1]],  # cat head P3
+   [-1, 3, C3, [256, False]],  # 24 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 27 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 30 (P5/32-large)
+
+   [[21, 24, 27, 30], 1, Detect, [nc, anchors]],  # Detect(P2, P3, P4, P5)
   ]
diff --git a/models/hub/yolov5-p34.yaml b/models/hub/yolov5-p34.yaml
new file mode 100644
index 000000000000..dbf0f850083e
--- /dev/null
+++ b/models/hub/yolov5-p34.yaml
@@ -0,0 +1,41 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.50  # layer channel multiple
+anchors: 3  # AutoAnchor evolves 3 anchors per P output layer
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [ [ -1, 1, Conv, [ 64, 6, 2, 2 ] ],  # 0-P1/2
+    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
+    [ -1, 3, C3, [ 128 ] ],
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
+    [ -1, 6, C3, [ 256 ] ],
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
+    [ -1, 9, C3, [ 512 ] ],
+    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 7-P5/32
+    [ -1, 3, C3, [ 1024 ] ],
+    [ -1, 1, SPPF, [ 1024, 5 ] ],  # 9
+  ]
+
+# YOLOv5 v6.0 head with (P3, P4) outputs
+head:
+  [ [ -1, 1, Conv, [ 512, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
+    [ -1, 3, C3, [ 512, False ] ],  # 13
+
+    [ -1, 1, Conv, [ 256, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
+    [ -1, 3, C3, [ 256, False ] ],  # 17 (P3/8-small)
+
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],
+    [ [ -1, 14 ], 1, Concat, [ 1 ] ],  # cat head P4
+    [ -1, 3, C3, [ 512, False ] ],  # 20 (P4/16-medium)
+
+    [ [ 17, 20 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4)
+  ]
diff --git a/models/hub/yolov5-p6.yaml b/models/hub/yolov5-p6.yaml
index 3728a118f090..a17202f22044 100644
--- a/models/hub/yolov5-p6.yaml
+++ b/models/hub/yolov5-p6.yaml
@@ -1,56 +1,56 @@
-# parameters
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple
+anchors: 3  # AutoAnchor evolves 3 anchors per P output layer
 
-# anchors
-anchors: 3
-
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
-    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
-    [ -1, 3, C3, [ 128 ] ],
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
-    [ -1, 9, C3, [ 256 ] ],
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
-    [ -1, 9, C3, [ 512 ] ],
-    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
-    [ -1, 3, C3, [ 768 ] ],
-    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
-    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
-    [ -1, 3, C3, [ 1024, False ] ],  # 11
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [768]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 11
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head with (P3, P4, P5, P6) outputs
 head:
-  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
-    [ -1, 3, C3, [ 768, False ] ],  # 15
-
-    [ -1, 1, Conv, [ 512, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
-    [ -1, 3, C3, [ 512, False ] ],  # 19
-
-    [ -1, 1, Conv, [ 256, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
-    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)
-
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],
-    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
-    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)
-
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],
-    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
-    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)
-
-    [ -1, 1, Conv, [ 768, 3, 2 ] ],
-    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
-    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P5/64-xlarge)
-
-    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
+  [[-1, 1, Conv, [768, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P5
+   [-1, 3, C3, [768, False]],  # 15
+
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 19
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 20], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 16], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
+
+   [-1, 1, Conv, [768, 3, 2]],
+   [[-1, 12], 1, Concat, [1]],  # cat head P6
+   [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
+
+   [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
   ]
diff --git a/models/hub/yolov5-p7.yaml b/models/hub/yolov5-p7.yaml
index ca8f8492ce0e..edd7d13a34a6 100644
--- a/models/hub/yolov5-p7.yaml
+++ b/models/hub/yolov5-p7.yaml
@@ -1,67 +1,67 @@
-# parameters
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple
+anchors: 3  # AutoAnchor evolves 3 anchors per P output layer
 
-# anchors
-anchors: 3
-
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
-    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
-    [ -1, 3, C3, [ 128 ] ],
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
-    [ -1, 9, C3, [ 256 ] ],
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
-    [ -1, 9, C3, [ 512 ] ],
-    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
-    [ -1, 3, C3, [ 768 ] ],
-    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
-    [ -1, 3, C3, [ 1024 ] ],
-    [ -1, 1, Conv, [ 1280, 3, 2 ] ],  # 11-P7/128
-    [ -1, 1, SPP, [ 1280, [ 3, 5 ] ] ],
-    [ -1, 3, C3, [ 1280, False ] ],  # 13
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [768]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
+   [-1, 3, C3, [1024]],
+   [-1, 1, Conv, [1280, 3, 2]],  # 11-P7/128
+   [-1, 3, C3, [1280]],
+   [-1, 1, SPPF, [1280, 5]],  # 13
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head with (P3, P4, P5, P6, P7) outputs
 head:
-  [ [ -1, 1, Conv, [ 1024, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 10 ], 1, Concat, [ 1 ] ],  # cat backbone P6
-    [ -1, 3, C3, [ 1024, False ] ],  # 17
+  [[-1, 1, Conv, [1024, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 10], 1, Concat, [1]],  # cat backbone P6
+   [-1, 3, C3, [1024, False]],  # 17
 
-    [ -1, 1, Conv, [ 768, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
-    [ -1, 3, C3, [ 768, False ] ],  # 21
+   [-1, 1, Conv, [768, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P5
+   [-1, 3, C3, [768, False]],  # 21
 
-    [ -1, 1, Conv, [ 512, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
-    [ -1, 3, C3, [ 512, False ] ],  # 25
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 25
 
-    [ -1, 1, Conv, [ 256, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
-    [ -1, 3, C3, [ 256, False ] ],  # 29 (P3/8-small)
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 29 (P3/8-small)
 
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],
-    [ [ -1, 26 ], 1, Concat, [ 1 ] ],  # cat head P4
-    [ -1, 3, C3, [ 512, False ] ],  # 32 (P4/16-medium)
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 26], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 32 (P4/16-medium)
 
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],
-    [ [ -1, 22 ], 1, Concat, [ 1 ] ],  # cat head P5
-    [ -1, 3, C3, [ 768, False ] ],  # 35 (P5/32-large)
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 22], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [768, False]],  # 35 (P5/32-large)
 
-    [ -1, 1, Conv, [ 768, 3, 2 ] ],
-    [ [ -1, 18 ], 1, Concat, [ 1 ] ],  # cat head P6
-    [ -1, 3, C3, [ 1024, False ] ],  # 38 (P6/64-xlarge)
+   [-1, 1, Conv, [768, 3, 2]],
+   [[-1, 18], 1, Concat, [1]],  # cat head P6
+   [-1, 3, C3, [1024, False]],  # 38 (P6/64-xlarge)
 
-    [ -1, 1, Conv, [ 1024, 3, 2 ] ],
-    [ [ -1, 14 ], 1, Concat, [ 1 ] ],  # cat head P7
-    [ -1, 3, C3, [ 1280, False ] ],  # 41 (P7/128-xxlarge)
+   [-1, 1, Conv, [1024, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P7
+   [-1, 3, C3, [1280, False]],  # 41 (P7/128-xxlarge)
 
-    [ [ 29, 32, 35, 38, 41 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6, P7)
+   [[29, 32, 35, 38, 41], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6, P7)
   ]
diff --git a/models/hub/yolov5-panet.yaml b/models/hub/yolov5-panet.yaml
index 340f95a4dbc9..ccfbf900691c 100644
--- a/models/hub/yolov5-panet.yaml
+++ b/models/hub/yolov5-panet.yaml
@@ -1,48 +1,48 @@
-# parameters
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple
-
-# anchors
 anchors:
   - [10,13, 16,30, 33,23]  # P3/8
   - [30,61, 62,45, 59,119]  # P4/16
   - [116,90, 156,198, 373,326]  # P5/32
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
-   [-1, 3, BottleneckCSP, [128]],
+   [-1, 3, C3, [128]],
    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 9, BottleneckCSP, [256]],
+   [-1, 6, C3, [256]],
    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
-   [-1, 9, BottleneckCSP, [512]],
+   [-1, 9, C3, [512]],
    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 3, BottleneckCSP, [1024, False]],  # 9
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
   ]
 
-# YOLOv5 PANet head
+# YOLOv5 v6.0 PANet head
 head:
   [[-1, 1, Conv, [512, 1, 1]],
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
-   [-1, 3, BottleneckCSP, [512, False]],  # 13
+   [-1, 3, C3, [512, False]],  # 13
 
    [-1, 1, Conv, [256, 1, 1]],
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
-   [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
 
    [-1, 1, Conv, [256, 3, 2]],
    [[-1, 14], 1, Concat, [1]],  # cat head P4
-   [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
 
    [-1, 1, Conv, [512, 3, 2]],
    [[-1, 10], 1, Concat, [1]],  # cat head P5
-   [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
 
    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
   ]
diff --git a/models/hub/yolov5l6.yaml b/models/hub/yolov5l6.yaml
index 11298b01f479..632c2cb699e3 100644
--- a/models/hub/yolov5l6.yaml
+++ b/models/hub/yolov5l6.yaml
@@ -1,60 +1,60 @@
-# parameters
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple
-
-# anchors
 anchors:
-  - [ 19,27,  44,40,  38,94 ]  # P3/8
-  - [ 96,68,  86,152,  180,137 ]  # P4/16
-  - [ 140,301,  303,264,  238,542 ]  # P5/32
-  - [ 436,615,  739,380,  925,792 ]  # P6/64
+  - [19,27,  44,40,  38,94]  # P3/8
+  - [96,68,  86,152,  180,137]  # P4/16
+  - [140,301,  303,264,  238,542]  # P5/32
+  - [436,615,  739,380,  925,792]  # P6/64
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
-    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
-    [ -1, 3, C3, [ 128 ] ],
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
-    [ -1, 9, C3, [ 256 ] ],
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
-    [ -1, 9, C3, [ 512 ] ],
-    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
-    [ -1, 3, C3, [ 768 ] ],
-    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
-    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
-    [ -1, 3, C3, [ 1024, False ] ],  # 11
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [768]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 11
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head
 head:
-  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
-    [ -1, 3, C3, [ 768, False ] ],  # 15
-
-    [ -1, 1, Conv, [ 512, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
-    [ -1, 3, C3, [ 512, False ] ],  # 19
-
-    [ -1, 1, Conv, [ 256, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
-    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)
-
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],
-    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
-    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)
-
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],
-    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
-    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)
-
-    [ -1, 1, Conv, [ 768, 3, 2 ] ],
-    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
-    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P6/64-xlarge)
-
-    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
+  [[-1, 1, Conv, [768, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P5
+   [-1, 3, C3, [768, False]],  # 15
+
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 19
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 20], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 16], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
+
+   [-1, 1, Conv, [768, 3, 2]],
+   [[-1, 12], 1, Concat, [1]],  # cat head P6
+   [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
+
+   [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
   ]
diff --git a/models/hub/yolov5m6.yaml b/models/hub/yolov5m6.yaml
index 48afc865593a..ecc53fd68ba6 100644
--- a/models/hub/yolov5m6.yaml
+++ b/models/hub/yolov5m6.yaml
@@ -1,60 +1,60 @@
-# parameters
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
 nc: 80  # number of classes
 depth_multiple: 0.67  # model depth multiple
 width_multiple: 0.75  # layer channel multiple
-
-# anchors
 anchors:
-  - [ 19,27,  44,40,  38,94 ]  # P3/8
-  - [ 96,68,  86,152,  180,137 ]  # P4/16
-  - [ 140,301,  303,264,  238,542 ]  # P5/32
-  - [ 436,615,  739,380,  925,792 ]  # P6/64
+  - [19,27,  44,40,  38,94]  # P3/8
+  - [96,68,  86,152,  180,137]  # P4/16
+  - [140,301,  303,264,  238,542]  # P5/32
+  - [436,615,  739,380,  925,792]  # P6/64
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
-    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
-    [ -1, 3, C3, [ 128 ] ],
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
-    [ -1, 9, C3, [ 256 ] ],
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
-    [ -1, 9, C3, [ 512 ] ],
-    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
-    [ -1, 3, C3, [ 768 ] ],
-    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
-    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
-    [ -1, 3, C3, [ 1024, False ] ],  # 11
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [768]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 11
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head
 head:
-  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
-    [ -1, 3, C3, [ 768, False ] ],  # 15
-
-    [ -1, 1, Conv, [ 512, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
-    [ -1, 3, C3, [ 512, False ] ],  # 19
-
-    [ -1, 1, Conv, [ 256, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
-    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)
-
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],
-    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
-    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)
-
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],
-    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
-    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)
-
-    [ -1, 1, Conv, [ 768, 3, 2 ] ],
-    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
-    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P6/64-xlarge)
-
-    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
+  [[-1, 1, Conv, [768, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P5
+   [-1, 3, C3, [768, False]],  # 15
+
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 19
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 20], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 16], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
+
+   [-1, 1, Conv, [768, 3, 2]],
+   [[-1, 12], 1, Concat, [1]],  # cat head P6
+   [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
+
+   [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
   ]
diff --git a/models/hub/yolov5n6.yaml b/models/hub/yolov5n6.yaml
new file mode 100644
index 000000000000..0c0c71d32551
--- /dev/null
+++ b/models/hub/yolov5n6.yaml
@@ -0,0 +1,60 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.25  # layer channel multiple
+anchors:
+  - [19,27,  44,40,  38,94]  # P3/8
+  - [96,68,  86,152,  180,137]  # P4/16
+  - [140,301,  303,264,  238,542]  # P5/32
+  - [436,615,  739,380,  925,792]  # P6/64
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [768]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 11
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [768, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P5
+   [-1, 3, C3, [768, False]],  # 15
+
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 19
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 20], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 16], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
+
+   [-1, 1, Conv, [768, 3, 2]],
+   [[-1, 12], 1, Concat, [1]],  # cat head P6
+   [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
+
+   [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
+  ]
diff --git a/models/hub/yolov5s-LeakyReLU.yaml b/models/hub/yolov5s-LeakyReLU.yaml
new file mode 100644
index 000000000000..3a179bf3311c
--- /dev/null
+++ b/models/hub/yolov5s-LeakyReLU.yaml
@@ -0,0 +1,49 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+activation: nn.LeakyReLU(0.1)  # <----- Conv() activation used throughout entire YOLOv5 model
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.50  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
diff --git a/models/hub/yolov5s-ghost.yaml b/models/hub/yolov5s-ghost.yaml
new file mode 100644
index 000000000000..ff9519c3f1aa
--- /dev/null
+++ b/models/hub/yolov5s-ghost.yaml
@@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.50  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, GhostConv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3Ghost, [128]],
+   [-1, 1, GhostConv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3Ghost, [256]],
+   [-1, 1, GhostConv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3Ghost, [512]],
+   [-1, 1, GhostConv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3Ghost, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, GhostConv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3Ghost, [512, False]],  # 13
+
+   [-1, 1, GhostConv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3Ghost, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, GhostConv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3Ghost, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, GhostConv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3Ghost, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
diff --git a/models/hub/yolov5s-transformer.yaml b/models/hub/yolov5s-transformer.yaml
index f2d666722b30..100d7c447527 100644
--- a/models/hub/yolov5s-transformer.yaml
+++ b/models/hub/yolov5s-transformer.yaml
@@ -1,30 +1,30 @@
-# parameters
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
 nc: 80  # number of classes
 depth_multiple: 0.33  # model depth multiple
 width_multiple: 0.50  # layer channel multiple
-
-# anchors
 anchors:
   - [10,13, 16,30, 33,23]  # P3/8
   - [30,61, 62,45, 59,119]  # P4/16
   - [116,90, 156,198, 373,326]  # P5/32
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
    [-1, 3, C3, [128]],
    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 9, C3, [256]],
+   [-1, 6, C3, [256]],
    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
    [-1, 9, C3, [512]],
    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 3, C3TR, [1024, False]],  # 9  <-------- C3TR() Transformer module
+   [-1, 3, C3TR, [1024]],  # 9 <--- C3TR() Transformer module
+   [-1, 1, SPPF, [1024, 5]],  # 9
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head
 head:
   [[-1, 1, Conv, [512, 1, 1]],
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
diff --git a/models/hub/yolov5s6.yaml b/models/hub/yolov5s6.yaml
index 1df577a2cc97..a28fb559482b 100644
--- a/models/hub/yolov5s6.yaml
+++ b/models/hub/yolov5s6.yaml
@@ -1,60 +1,60 @@
-# parameters
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
 nc: 80  # number of classes
 depth_multiple: 0.33  # model depth multiple
 width_multiple: 0.50  # layer channel multiple
-
-# anchors
 anchors:
-  - [ 19,27,  44,40,  38,94 ]  # P3/8
-  - [ 96,68,  86,152,  180,137 ]  # P4/16
-  - [ 140,301,  303,264,  238,542 ]  # P5/32
-  - [ 436,615,  739,380,  925,792 ]  # P6/64
+  - [19,27,  44,40,  38,94]  # P3/8
+  - [96,68,  86,152,  180,137]  # P4/16
+  - [140,301,  303,264,  238,542]  # P5/32
+  - [436,615,  739,380,  925,792]  # P6/64
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
-    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
-    [ -1, 3, C3, [ 128 ] ],
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
-    [ -1, 9, C3, [ 256 ] ],
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
-    [ -1, 9, C3, [ 512 ] ],
-    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
-    [ -1, 3, C3, [ 768 ] ],
-    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
-    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
-    [ -1, 3, C3, [ 1024, False ] ],  # 11
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [768]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 11
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head
 head:
-  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
-    [ -1, 3, C3, [ 768, False ] ],  # 15
-
-    [ -1, 1, Conv, [ 512, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
-    [ -1, 3, C3, [ 512, False ] ],  # 19
-
-    [ -1, 1, Conv, [ 256, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
-    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)
-
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],
-    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
-    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)
-
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],
-    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
-    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)
-
-    [ -1, 1, Conv, [ 768, 3, 2 ] ],
-    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
-    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P6/64-xlarge)
-
-    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
+  [[-1, 1, Conv, [768, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P5
+   [-1, 3, C3, [768, False]],  # 15
+
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 19
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 20], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 16], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
+
+   [-1, 1, Conv, [768, 3, 2]],
+   [[-1, 12], 1, Concat, [1]],  # cat head P6
+   [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
+
+   [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
   ]
diff --git a/models/hub/yolov5x6.yaml b/models/hub/yolov5x6.yaml
index 5ebc02124fe7..ba795c4aad31 100644
--- a/models/hub/yolov5x6.yaml
+++ b/models/hub/yolov5x6.yaml
@@ -1,60 +1,60 @@
-# parameters
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
 nc: 80  # number of classes
 depth_multiple: 1.33  # model depth multiple
 width_multiple: 1.25  # layer channel multiple
-
-# anchors
 anchors:
-  - [ 19,27,  44,40,  38,94 ]  # P3/8
-  - [ 96,68,  86,152,  180,137 ]  # P4/16
-  - [ 140,301,  303,264,  238,542 ]  # P5/32
-  - [ 436,615,  739,380,  925,792 ]  # P6/64
+  - [19,27,  44,40,  38,94]  # P3/8
+  - [96,68,  86,152,  180,137]  # P4/16
+  - [140,301,  303,264,  238,542]  # P5/32
+  - [436,615,  739,380,  925,792]  # P6/64
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
-    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
-    [ -1, 3, C3, [ 128 ] ],
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
-    [ -1, 9, C3, [ 256 ] ],
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
-    [ -1, 9, C3, [ 512 ] ],
-    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
-    [ -1, 3, C3, [ 768 ] ],
-    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
-    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
-    [ -1, 3, C3, [ 1024, False ] ],  # 11
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [768]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 11
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head
 head:
-  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
-    [ -1, 3, C3, [ 768, False ] ],  # 15
-
-    [ -1, 1, Conv, [ 512, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
-    [ -1, 3, C3, [ 512, False ] ],  # 19
-
-    [ -1, 1, Conv, [ 256, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
-    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)
-
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],
-    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
-    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)
-
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],
-    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
-    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)
-
-    [ -1, 1, Conv, [ 768, 3, 2 ] ],
-    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
-    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P6/64-xlarge)
-
-    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
+  [[-1, 1, Conv, [768, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P5
+   [-1, 3, C3, [768, False]],  # 15
+
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 19
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 20], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 16], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
+
+   [-1, 1, Conv, [768, 3, 2]],
+   [[-1, 12], 1, Concat, [1]],  # cat head P6
+   [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
+
+   [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
   ]
diff --git a/models/segment/yolov5l-seg.yaml b/models/segment/yolov5l-seg.yaml
new file mode 100644
index 000000000000..4782de11dd2d
--- /dev/null
+++ b/models/segment/yolov5l-seg.yaml
@@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+  ]
diff --git a/models/segment/yolov5m-seg.yaml b/models/segment/yolov5m-seg.yaml
new file mode 100644
index 000000000000..07ec25ba264d
--- /dev/null
+++ b/models/segment/yolov5m-seg.yaml
@@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.67  # model depth multiple
+width_multiple: 0.75  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+  ]
diff --git a/models/segment/yolov5n-seg.yaml b/models/segment/yolov5n-seg.yaml
new file mode 100644
index 000000000000..c28225ab4a50
--- /dev/null
+++ b/models/segment/yolov5n-seg.yaml
@@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.25  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+  ]
diff --git a/models/segment/yolov5s-seg.yaml b/models/segment/yolov5s-seg.yaml
new file mode 100644
index 000000000000..a827814e1399
--- /dev/null
+++ b/models/segment/yolov5s-seg.yaml
@@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.5  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+  ]
diff --git a/models/segment/yolov5x-seg.yaml b/models/segment/yolov5x-seg.yaml
new file mode 100644
index 000000000000..5d0c4524a99c
--- /dev/null
+++ b/models/segment/yolov5x-seg.yaml
@@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.33  # model depth multiple
+width_multiple: 1.25  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+  ]
diff --git a/models/tf.py b/models/tf.py
new file mode 100644
index 000000000000..8290cf2e57f5
--- /dev/null
+++ b/models/tf.py
@@ -0,0 +1,608 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+TensorFlow, Keras and TFLite versions of YOLOv5
+Authored by https://github.com/zldrobit in PR https://github.com/ultralytics/yolov5/pull/1127
+
+Usage:
+    $ python models/tf.py --weights yolov5s.pt
+
+Export:
+    $ python export.py --weights yolov5s.pt --include saved_model pb tflite tfjs
+"""
+
+import argparse
+import sys
+from copy import deepcopy
+from pathlib import Path
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+# ROOT = ROOT.relative_to(Path.cwd())  # relative
+
+import numpy as np
+import tensorflow as tf
+import torch
+import torch.nn as nn
+from tensorflow import keras
+
+from models.common import (C3, SPP, SPPF, Bottleneck, BottleneckCSP, C3x, Concat, Conv, CrossConv, DWConv,
+                           DWConvTranspose2d, Focus, autopad)
+from models.experimental import MixConv2d, attempt_load
+from models.yolo import Detect, Segment
+from utils.activations import SiLU
+from utils.general import LOGGER, make_divisible, print_args
+
+
+class TFBN(keras.layers.Layer):
+    # TensorFlow BatchNormalization wrapper
+    def __init__(self, w=None):
+        super().__init__()
+        self.bn = keras.layers.BatchNormalization(
+            beta_initializer=keras.initializers.Constant(w.bias.numpy()),
+            gamma_initializer=keras.initializers.Constant(w.weight.numpy()),
+            moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()),
+            moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()),
+            epsilon=w.eps)
+
+    def call(self, inputs):
+        return self.bn(inputs)
+
+
+class TFPad(keras.layers.Layer):
+    # Pad inputs in spatial dimensions 1 and 2
+    def __init__(self, pad):
+        super().__init__()
+        if isinstance(pad, int):
+            self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
+        else:  # tuple/list
+            self.pad = tf.constant([[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]])
+
+    def call(self, inputs):
+        return tf.pad(inputs, self.pad, mode='constant', constant_values=0)
+
+
+class TFConv(keras.layers.Layer):
+    # Standard convolution
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
+        # ch_in, ch_out, weights, kernel, stride, padding, groups
+        super().__init__()
+        assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
+        # TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
+        # see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
+        conv = keras.layers.Conv2D(
+            filters=c2,
+            kernel_size=k,
+            strides=s,
+            padding='SAME' if s == 1 else 'VALID',
+            use_bias=not hasattr(w, 'bn'),
+            kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
+            bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy()))
+        self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
+        self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity
+        self.act = activations(w.act) if act else tf.identity
+
+    def call(self, inputs):
+        return self.act(self.bn(self.conv(inputs)))
+
+
+class TFDWConv(keras.layers.Layer):
+    # Depthwise convolution
+    def __init__(self, c1, c2, k=1, s=1, p=None, act=True, w=None):
+        # ch_in, ch_out, weights, kernel, stride, padding, groups
+        super().__init__()
+        assert c2 % c1 == 0, f'TFDWConv() output={c2} must be a multiple of input={c1} channels'
+        conv = keras.layers.DepthwiseConv2D(
+            kernel_size=k,
+            depth_multiplier=c2 // c1,
+            strides=s,
+            padding='SAME' if s == 1 else 'VALID',
+            use_bias=not hasattr(w, 'bn'),
+            depthwise_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
+            bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy()))
+        self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
+        self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity
+        self.act = activations(w.act) if act else tf.identity
+
+    def call(self, inputs):
+        return self.act(self.bn(self.conv(inputs)))
+
+
+class TFDWConvTranspose2d(keras.layers.Layer):
+    # Depthwise ConvTranspose2d
+    def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0, w=None):
+        # ch_in, ch_out, weights, kernel, stride, padding, groups
+        super().__init__()
+        assert c1 == c2, f'TFDWConv() output={c2} must be equal to input={c1} channels'
+        assert k == 4 and p1 == 1, 'TFDWConv() only valid for k=4 and p1=1'
+        weight, bias = w.weight.permute(2, 3, 1, 0).numpy(), w.bias.numpy()
+        self.c1 = c1
+        self.conv = [
+            keras.layers.Conv2DTranspose(filters=1,
+                                         kernel_size=k,
+                                         strides=s,
+                                         padding='VALID',
+                                         output_padding=p2,
+                                         use_bias=True,
+                                         kernel_initializer=keras.initializers.Constant(weight[..., i:i + 1]),
+                                         bias_initializer=keras.initializers.Constant(bias[i])) for i in range(c1)]
+
+    def call(self, inputs):
+        return tf.concat([m(x) for m, x in zip(self.conv, tf.split(inputs, self.c1, 3))], 3)[:, 1:-1, 1:-1]
+
+
+class TFFocus(keras.layers.Layer):
+    # Focus wh information into c-space
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
+        # ch_in, ch_out, kernel, stride, padding, groups
+        super().__init__()
+        self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
+
+    def call(self, inputs):  # x(b,w,h,c) -> y(b,w/2,h/2,4c)
+        # inputs = inputs / 255  # normalize 0-255 to 0-1
+        inputs = [inputs[:, ::2, ::2, :], inputs[:, 1::2, ::2, :], inputs[:, ::2, 1::2, :], inputs[:, 1::2, 1::2, :]]
+        return self.conv(tf.concat(inputs, 3))
+
+
+class TFBottleneck(keras.layers.Layer):
+    # Standard bottleneck
+    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None):  # ch_in, ch_out, shortcut, groups, expansion
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2)
+        self.add = shortcut and c1 == c2
+
+    def call(self, inputs):
+        return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
+
+
+class TFCrossConv(keras.layers.Layer):
+    # Cross Convolution
+    def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False, w=None):
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = TFConv(c1, c_, (1, k), (1, s), w=w.cv1)
+        self.cv2 = TFConv(c_, c2, (k, 1), (s, 1), g=g, w=w.cv2)
+        self.add = shortcut and c1 == c2
+
+    def call(self, inputs):
+        return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
+
+
+class TFConv2d(keras.layers.Layer):
+    # Substitution for PyTorch nn.Conv2D
+    def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
+        super().__init__()
+        assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
+        self.conv = keras.layers.Conv2D(filters=c2,
+                                        kernel_size=k,
+                                        strides=s,
+                                        padding='VALID',
+                                        use_bias=bias,
+                                        kernel_initializer=keras.initializers.Constant(
+                                            w.weight.permute(2, 3, 1, 0).numpy()),
+                                        bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None)
+
+    def call(self, inputs):
+        return self.conv(inputs)
+
+
+class TFBottleneckCSP(keras.layers.Layer):
+    # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
+        # ch_in, ch_out, number, shortcut, groups, expansion
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2)
+        self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3)
+        self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4)
+        self.bn = TFBN(w.bn)
+        self.act = lambda x: keras.activations.swish(x)
+        self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
+
+    def call(self, inputs):
+        y1 = self.cv3(self.m(self.cv1(inputs)))
+        y2 = self.cv2(inputs)
+        return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3))))
+
+
+class TFC3(keras.layers.Layer):
+    # CSP Bottleneck with 3 convolutions
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
+        # ch_in, ch_out, number, shortcut, groups, expansion
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
+        self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
+        self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
+
+    def call(self, inputs):
+        return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
+
+
+class TFC3x(keras.layers.Layer):
+    # 3 module with cross-convolutions
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
+        # ch_in, ch_out, number, shortcut, groups, expansion
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
+        self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
+        self.m = keras.Sequential([
+            TFCrossConv(c_, c_, k=3, s=1, g=g, e=1.0, shortcut=shortcut, w=w.m[j]) for j in range(n)])
+
+    def call(self, inputs):
+        return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
+
+
+class TFSPP(keras.layers.Layer):
+    # Spatial pyramid pooling layer used in YOLOv3-SPP
+    def __init__(self, c1, c2, k=(5, 9, 13), w=None):
+        super().__init__()
+        c_ = c1 // 2  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2)
+        self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding='SAME') for x in k]
+
+    def call(self, inputs):
+        x = self.cv1(inputs)
+        return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))
+
+
+class TFSPPF(keras.layers.Layer):
+    # Spatial pyramid pooling-Fast layer
+    def __init__(self, c1, c2, k=5, w=None):
+        super().__init__()
+        c_ = c1 // 2  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2)
+        self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding='SAME')
+
+    def call(self, inputs):
+        x = self.cv1(inputs)
+        y1 = self.m(x)
+        y2 = self.m(y1)
+        return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3))
+
+
+class TFDetect(keras.layers.Layer):
+    # TF YOLOv5 Detect layer
+    def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None):  # detection layer
+        super().__init__()
+        self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
+        self.nc = nc  # number of classes
+        self.no = nc + 5  # number of outputs per anchor
+        self.nl = len(anchors)  # number of detection layers
+        self.na = len(anchors[0]) // 2  # number of anchors
+        self.grid = [tf.zeros(1)] * self.nl  # init grid
+        self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32)
+        self.anchor_grid = tf.reshape(self.anchors * tf.reshape(self.stride, [self.nl, 1, 1]), [self.nl, 1, -1, 1, 2])
+        self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]
+        self.training = False  # set to False after building model
+        self.imgsz = imgsz
+        for i in range(self.nl):
+            ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
+            self.grid[i] = self._make_grid(nx, ny)
+
+    def call(self, inputs):
+        z = []  # inference output
+        x = []
+        for i in range(self.nl):
+            x.append(self.m[i](inputs[i]))
+            # x(bs,20,20,255) to x(bs,3,20,20,85)
+            ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
+            x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no])
+
+            if not self.training:  # inference
+                y = x[i]
+                grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5
+                anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4
+                xy = (tf.sigmoid(y[..., 0:2]) * 2 + grid) * self.stride[i]  # xy
+                wh = tf.sigmoid(y[..., 2:4]) ** 2 * anchor_grid
+                # Normalize xywh to 0-1 to reduce calibration error
+                xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
+                wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
+                y = tf.concat([xy, wh, tf.sigmoid(y[..., 4:5 + self.nc]), y[..., 5 + self.nc:]], -1)
+                z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))
+
+        return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1),)
+
+    @staticmethod
+    def _make_grid(nx=20, ny=20):
+        # yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
+        # return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
+        xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
+        return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
+
+
+class TFSegment(TFDetect):
+    # YOLOv5 Segment head for segmentation models
+    def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None):
+        super().__init__(nc, anchors, ch, imgsz, w)
+        self.nm = nm  # number of masks
+        self.npr = npr  # number of protos
+        self.no = 5 + nc + self.nm  # number of outputs per anchor
+        self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]  # output conv
+        self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto)  # protos
+        self.detect = TFDetect.call
+
+    def call(self, x):
+        p = self.proto(x[0])
+        # p = TFUpsample(None, scale_factor=4, mode='nearest')(self.proto(x[0]))  # (optional) full-size protos
+        p = tf.transpose(p, [0, 3, 1, 2])  # from shape(1,160,160,32) to shape(1,32,160,160)
+        x = self.detect(self, x)
+        return (x, p) if self.training else (x[0], p)
+
+
+class TFProto(keras.layers.Layer):
+
+    def __init__(self, c1, c_=256, c2=32, w=None):
+        super().__init__()
+        self.cv1 = TFConv(c1, c_, k=3, w=w.cv1)
+        self.upsample = TFUpsample(None, scale_factor=2, mode='nearest')
+        self.cv2 = TFConv(c_, c_, k=3, w=w.cv2)
+        self.cv3 = TFConv(c_, c2, w=w.cv3)
+
+    def call(self, inputs):
+        return self.cv3(self.cv2(self.upsample(self.cv1(inputs))))
+
+
+class TFUpsample(keras.layers.Layer):
+    # TF version of torch.nn.Upsample()
+    def __init__(self, size, scale_factor, mode, w=None):  # warning: all arguments needed including 'w'
+        super().__init__()
+        assert scale_factor % 2 == 0, 'scale_factor must be multiple of 2'
+        self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * scale_factor, x.shape[2] * scale_factor), mode)
+        # self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode)
+        # with default arguments: align_corners=False, half_pixel_centers=False
+        # self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x,
+        #                                                            size=(x.shape[1] * 2, x.shape[2] * 2))
+
+    def call(self, inputs):
+        return self.upsample(inputs)
+
+
+class TFConcat(keras.layers.Layer):
+    # TF version of torch.concat()
+    def __init__(self, dimension=1, w=None):
+        super().__init__()
+        assert dimension == 1, 'convert only NCHW to NHWC concat'
+        self.d = 3
+
+    def call(self, inputs):
+        return tf.concat(inputs, self.d)
+
+
+def parse_model(d, ch, model, imgsz):  # model_dict, input_channels(3)
+    LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}  {'module':<40}{'arguments':<30}")
+    anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
+    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
+    no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
+
+    layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
+    for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):  # from, number, module, args
+        m_str = m
+        m = eval(m) if isinstance(m, str) else m  # eval strings
+        for j, a in enumerate(args):
+            try:
+                args[j] = eval(a) if isinstance(a, str) else a  # eval strings
+            except NameError:
+                pass
+
+        n = max(round(n * gd), 1) if n > 1 else n  # depth gain
+        if m in [
+                nn.Conv2d, Conv, DWConv, DWConvTranspose2d, Bottleneck, SPP, SPPF, MixConv2d, Focus, CrossConv,
+                BottleneckCSP, C3, C3x]:
+            c1, c2 = ch[f], args[0]
+            c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
+
+            args = [c1, c2, *args[1:]]
+            if m in [BottleneckCSP, C3, C3x]:
+                args.insert(2, n)
+                n = 1
+        elif m is nn.BatchNorm2d:
+            args = [ch[f]]
+        elif m is Concat:
+            c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
+        elif m in [Detect, Segment]:
+            args.append([ch[x + 1] for x in f])
+            if isinstance(args[1], int):  # number of anchors
+                args[1] = [list(range(args[1] * 2))] * len(f)
+            if m is Segment:
+                args[3] = make_divisible(args[3] * gw, 8)
+            args.append(imgsz)
+        else:
+            c2 = ch[f]
+
+        tf_m = eval('TF' + m_str.replace('nn.', ''))
+        m_ = keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)]) if n > 1 \
+            else tf_m(*args, w=model.model[i])  # module
+
+        torch_m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args)  # module
+        t = str(m)[8:-2].replace('__main__.', '')  # module type
+        np = sum(x.numel() for x in torch_m_.parameters())  # number params
+        m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
+        LOGGER.info(f'{i:>3}{str(f):>18}{str(n):>3}{np:>10}  {t:<40}{str(args):<30}')  # print
+        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
+        layers.append(m_)
+        ch.append(c2)
+    return keras.Sequential(layers), sorted(save)
+
+
+class TFModel:
+    # TF YOLOv5 model
+    def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, model=None, imgsz=(640, 640)):  # model, channels, classes
+        super().__init__()
+        if isinstance(cfg, dict):
+            self.yaml = cfg  # model dict
+        else:  # is *.yaml
+            import yaml  # for torch hub
+            self.yaml_file = Path(cfg).name
+            with open(cfg) as f:
+                self.yaml = yaml.load(f, Loader=yaml.FullLoader)  # model dict
+
+        # Define model
+        if nc and nc != self.yaml['nc']:
+            LOGGER.info(f"Overriding {cfg} nc={self.yaml['nc']} with nc={nc}")
+            self.yaml['nc'] = nc  # override yaml value
+        self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz)
+
+    def predict(self,
+                inputs,
+                tf_nms=False,
+                agnostic_nms=False,
+                topk_per_class=100,
+                topk_all=100,
+                iou_thres=0.45,
+                conf_thres=0.25):
+        y = []  # outputs
+        x = inputs
+        for m in self.model.layers:
+            if m.f != -1:  # if not from previous layer
+                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
+
+            x = m(x)  # run
+            y.append(x if m.i in self.savelist else None)  # save output
+
+        # Add TensorFlow NMS
+        if tf_nms:
+            boxes = self._xywh2xyxy(x[0][..., :4])
+            probs = x[0][:, :, 4:5]
+            classes = x[0][:, :, 5:]
+            scores = probs * classes
+            if agnostic_nms:
+                nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres)
+            else:
+                boxes = tf.expand_dims(boxes, 2)
+                nms = tf.image.combined_non_max_suppression(boxes,
+                                                            scores,
+                                                            topk_per_class,
+                                                            topk_all,
+                                                            iou_thres,
+                                                            conf_thres,
+                                                            clip_boxes=False)
+            return (nms,)
+        return x  # output [1,6300,85] = [xywh, conf, class0, class1, ...]
+        # x = x[0]  # [x(1,6300,85), ...] to x(6300,85)
+        # xywh = x[..., :4]  # x(6300,4) boxes
+        # conf = x[..., 4:5]  # x(6300,1) confidences
+        # cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1))  # x(6300,1)  classes
+        # return tf.concat([conf, cls, xywh], 1)
+
+    @staticmethod
+    def _xywh2xyxy(xywh):
+        # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+        x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1)
+        return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1)
+
+
+class AgnosticNMS(keras.layers.Layer):
+    # TF Agnostic NMS
+    def call(self, input, topk_all, iou_thres, conf_thres):
+        # wrap map_fn to avoid TypeSpec related error https://stackoverflow.com/a/65809989/3036450
+        return tf.map_fn(lambda x: self._nms(x, topk_all, iou_thres, conf_thres),
+                         input,
+                         fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32),
+                         name='agnostic_nms')
+
+    @staticmethod
+    def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25):  # agnostic NMS
+        boxes, classes, scores = x
+        class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32)
+        scores_inp = tf.reduce_max(scores, -1)
+        selected_inds = tf.image.non_max_suppression(boxes,
+                                                     scores_inp,
+                                                     max_output_size=topk_all,
+                                                     iou_threshold=iou_thres,
+                                                     score_threshold=conf_thres)
+        selected_boxes = tf.gather(boxes, selected_inds)
+        padded_boxes = tf.pad(selected_boxes,
+                              paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]],
+                              mode='CONSTANT',
+                              constant_values=0.0)
+        selected_scores = tf.gather(scores_inp, selected_inds)
+        padded_scores = tf.pad(selected_scores,
+                               paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
+                               mode='CONSTANT',
+                               constant_values=-1.0)
+        selected_classes = tf.gather(class_inds, selected_inds)
+        padded_classes = tf.pad(selected_classes,
+                                paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
+                                mode='CONSTANT',
+                                constant_values=-1.0)
+        valid_detections = tf.shape(selected_inds)[0]
+        return padded_boxes, padded_scores, padded_classes, valid_detections
+
+
+def activations(act=nn.SiLU):
+    # Returns TF activation from input PyTorch activation
+    if isinstance(act, nn.LeakyReLU):
+        return lambda x: keras.activations.relu(x, alpha=0.1)
+    elif isinstance(act, nn.Hardswish):
+        return lambda x: x * tf.nn.relu6(x + 3) * 0.166666667
+    elif isinstance(act, (nn.SiLU, SiLU)):
+        return lambda x: keras.activations.swish(x)
+    else:
+        raise Exception(f'no matching TensorFlow activation found for PyTorch activation {act}')
+
+
+def representative_dataset_gen(dataset, ncalib=100):
+    # Representative dataset generator for use with converter.representative_dataset, returns a generator of np arrays
+    for n, (path, img, im0s, vid_cap, string) in enumerate(dataset):
+        im = np.transpose(img, [1, 2, 0])
+        im = np.expand_dims(im, axis=0).astype(np.float32)
+        im /= 255
+        yield [im]
+        if n >= ncalib:
+            break
+
+
+def run(
+        weights=ROOT / 'yolov5s.pt',  # weights path
+        imgsz=(640, 640),  # inference size h,w
+        batch_size=1,  # batch size
+        dynamic=False,  # dynamic batch size
+):
+    # PyTorch model
+    im = torch.zeros((batch_size, 3, *imgsz))  # BCHW image
+    model = attempt_load(weights, device=torch.device('cpu'), inplace=True, fuse=False)
+    _ = model(im)  # inference
+    model.info()
+
+    # TensorFlow model
+    im = tf.zeros((batch_size, *imgsz, 3))  # BHWC image
+    tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
+    _ = tf_model.predict(im)  # inference
+
+    # Keras model
+    im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size)
+    keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im))
+    keras_model.summary()
+
+    LOGGER.info('PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.')
+
+
+def parse_opt():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='weights path')
+    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
+    parser.add_argument('--batch-size', type=int, default=1, help='batch size')
+    parser.add_argument('--dynamic', action='store_true', help='dynamic batch size')
+    opt = parser.parse_args()
+    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
+    print_args(vars(opt))
+    return opt
+
+
+def main(opt):
+    run(**vars(opt))
+
+
+if __name__ == '__main__':
+    opt = parse_opt()
+    main(opt)
diff --git a/models/yolo.py b/models/yolo.py
index cbff70fc83d4..ed21c067ee93 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -1,150 +1,254 @@
-# YOLOv5 YOLO-specific modules
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+YOLO-specific modules
+
+Usage:
+    $ python models/yolo.py --cfg yolov5s.yaml
+"""
 
 import argparse
-import logging
+import contextlib
+import os
+import platform
 import sys
 from copy import deepcopy
 from pathlib import Path
 
-sys.path.append(Path(__file__).parent.parent.absolute().__str__())  # to run '$ python *.py' files in subdirectories
-logger = logging.getLogger(__name__)
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+if platform.system() != 'Windows':
+    ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 
 from models.common import *
 from models.experimental import *
 from utils.autoanchor import check_anchor_order
-from utils.general import make_divisible, check_file, set_logging
-from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \
-    select_device, copy_attr
+from utils.general import LOGGER, check_version, check_yaml, make_divisible, print_args
+from utils.plots import feature_visualization
+from utils.torch_utils import (fuse_conv_and_bn, initialize_weights, model_info, profile, scale_img, select_device,
+                               time_sync)
 
 try:
-    import thop  # for FLOPS computation
+    import thop  # for FLOPs computation
 except ImportError:
     thop = None
 
 
 class Detect(nn.Module):
+    # YOLOv5 Detect head for detection models
     stride = None  # strides computed during build
-    export = False  # onnx export
+    dynamic = False  # force grid reconstruction
+    export = False  # export mode
 
-    def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
-        super(Detect, self).__init__()
+    def __init__(self, nc=80, anchors=(), ch=(), inplace=True):  # detection layer
+        super().__init__()
         self.nc = nc  # number of classes
         self.no = nc + 5  # number of outputs per anchor
         self.nl = len(anchors)  # number of detection layers
         self.na = len(anchors[0]) // 2  # number of anchors
-        self.grid = [torch.zeros(1)] * self.nl  # init grid
-        a = torch.tensor(anchors).float().view(self.nl, -1, 2)
-        self.register_buffer('anchors', a)  # shape(nl,na,2)
-        self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
+        self.grid = [torch.empty(0) for _ in range(self.nl)]  # init grid
+        self.anchor_grid = [torch.empty(0) for _ in range(self.nl)]  # init anchor grid
+        self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2))  # shape(nl,na,2)
         self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
+        self.inplace = inplace  # use inplace ops (e.g. slice assignment)
 
     def forward(self, x):
-        # x = x.copy()  # for profiling
         z = []  # inference output
-        self.training |= self.export
         for i in range(self.nl):
             x[i] = self.m[i](x[i])  # conv
             bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
             x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
 
             if not self.training:  # inference
-                if self.grid[i].shape[2:4] != x[i].shape[2:4]:
-                    self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
+                if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
+                    self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
+
+                if isinstance(self, Segment):  # (boxes + masks)
+                    xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4)
+                    xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i]  # xy
+                    wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i]  # wh
+                    y = torch.cat((xy, wh, conf.sigmoid(), mask), 4)
+                else:  # Detect (boxes only)
+                    xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
+                    xy = (xy * 2 + self.grid[i]) * self.stride[i]  # xy
+                    wh = (wh * 2) ** 2 * self.anchor_grid[i]  # wh
+                    y = torch.cat((xy, wh, conf), 4)
+                z.append(y.view(bs, self.na * nx * ny, self.no))
+
+        return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)
+
+    def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version__, '1.10.0')):
+        d = self.anchors[i].device
+        t = self.anchors[i].dtype
+        shape = 1, self.na, ny, nx, 2  # grid shape
+        y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t)
+        yv, xv = torch.meshgrid(y, x, indexing='ij') if torch_1_10 else torch.meshgrid(y, x)  # torch>=0.7 compatibility
+        grid = torch.stack((xv, yv), 2).expand(shape) - 0.5  # add grid offset, i.e. y = 2.0 * x - 0.5
+        anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape)
+        return grid, anchor_grid
+
+
+class Segment(Detect):
+    # YOLOv5 Segment head for segmentation models
+    def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
+        super().__init__(nc, anchors, ch, inplace)
+        self.nm = nm  # number of masks
+        self.npr = npr  # number of protos
+        self.no = 5 + nc + self.nm  # number of outputs per anchor
+        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
+        self.proto = Proto(ch[0], self.npr, self.nm)  # protos
+        self.detect = Detect.forward
 
-                y = x[i].sigmoid()
-                y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
-                y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
-                z.append(y.view(bs, -1, self.no))
+    def forward(self, x):
+        p = self.proto(x[0])
+        x = self.detect(self, x)
+        return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1])
 
-        return x if self.training else (torch.cat(z, 1), x)
 
-    @staticmethod
-    def _make_grid(nx=20, ny=20):
-        yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
-        return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
+class BaseModel(nn.Module):
+    # YOLOv5 base model
+    def forward(self, x, profile=False, visualize=False):
+        return self._forward_once(x, profile, visualize)  # single-scale inference, train
 
+    def _forward_once(self, x, profile=False, visualize=False):
+        y, dt = [], []  # outputs
+        for m in self.model:
+            if m.f != -1:  # if not from previous layer
+                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
+            if profile:
+                self._profile_one_layer(m, x, dt)
+            x = m(x)  # run
+            y.append(x if m.i in self.save else None)  # save output
+            if visualize:
+                feature_visualization(x, m.type, m.i, save_dir=visualize)
+        return x
+
+    def _profile_one_layer(self, m, x, dt):
+        c = m == self.model[-1]  # is final layer, copy input as inplace fix
+        o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPs
+        t = time_sync()
+        for _ in range(10):
+            m(x.copy() if c else x)
+        dt.append((time_sync() - t) * 100)
+        if m == self.model[0]:
+            LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s}  module")
+        LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f}  {m.type}')
+        if c:
+            LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s}  Total")
 
-class Model(nn.Module):
+    def fuse(self):  # fuse model Conv2d() + BatchNorm2d() layers
+        LOGGER.info('Fusing layers... ')
+        for m in self.model.modules():
+            if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
+                m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
+                delattr(m, 'bn')  # remove batchnorm
+                m.forward = m.forward_fuse  # update forward
+        self.info()
+        return self
+
+    def info(self, verbose=False, img_size=640):  # print model information
+        model_info(self, verbose, img_size)
+
+    def _apply(self, fn):
+        # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
+        self = super()._apply(fn)
+        m = self.model[-1]  # Detect()
+        if isinstance(m, (Detect, Segment)):
+            m.stride = fn(m.stride)
+            m.grid = list(map(fn, m.grid))
+            if isinstance(m.anchor_grid, list):
+                m.anchor_grid = list(map(fn, m.anchor_grid))
+        return self
+
+
+class DetectionModel(BaseModel):
+    # YOLOv5 detection model
     def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None):  # model, input channels, number of classes
-        super(Model, self).__init__()
+        super().__init__()
         if isinstance(cfg, dict):
             self.yaml = cfg  # model dict
         else:  # is *.yaml
             import yaml  # for torch hub
             self.yaml_file = Path(cfg).name
-            with open(cfg) as f:
+            with open(cfg, encoding='ascii', errors='ignore') as f:
                 self.yaml = yaml.safe_load(f)  # model dict
 
         # Define model
         ch = self.yaml['ch'] = self.yaml.get('ch', ch)  # input channels
         if nc and nc != self.yaml['nc']:
-            logger.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
+            LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
             self.yaml['nc'] = nc  # override yaml value
         if anchors:
-            logger.info(f'Overriding model.yaml anchors with anchors={anchors}')
+            LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}')
             self.yaml['anchors'] = round(anchors)  # override yaml value
         self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch])  # model, savelist
         self.names = [str(i) for i in range(self.yaml['nc'])]  # default names
-        # logger.info([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
+        self.inplace = self.yaml.get('inplace', True)
 
         # Build strides, anchors
         m = self.model[-1]  # Detect()
-        if isinstance(m, Detect):
+        if isinstance(m, (Detect, Segment)):
             s = 256  # 2x min stride
-            m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))])  # forward
-            m.anchors /= m.stride.view(-1, 1, 1)
+            m.inplace = self.inplace
+            forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
+            m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))])  # forward
             check_anchor_order(m)
+            m.anchors /= m.stride.view(-1, 1, 1)
             self.stride = m.stride
             self._initialize_biases()  # only run once
-            # logger.info('Strides: %s' % m.stride.tolist())
 
         # Init weights, biases
         initialize_weights(self)
         self.info()
-        logger.info('')
+        LOGGER.info('')
 
-    def forward(self, x, augment=False, profile=False):
+    def forward(self, x, augment=False, profile=False, visualize=False):
         if augment:
-            img_size = x.shape[-2:]  # height, width
-            s = [1, 0.83, 0.67]  # scales
-            f = [None, 3, None]  # flips (2-ud, 3-lr)
-            y = []  # outputs
-            for si, fi in zip(s, f):
-                xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
-                yi = self.forward_once(xi)[0]  # forward
-                # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1])  # save
-                yi[..., :4] /= si  # de-scale
-                if fi == 2:
-                    yi[..., 1] = img_size[0] - yi[..., 1]  # de-flip ud
-                elif fi == 3:
-                    yi[..., 0] = img_size[1] - yi[..., 0]  # de-flip lr
-                y.append(yi)
-            return torch.cat(y, 1), None  # augmented inference, train
+            return self._forward_augment(x)  # augmented inference, None
+        return self._forward_once(x, profile, visualize)  # single-scale inference, train
+
+    def _forward_augment(self, x):
+        img_size = x.shape[-2:]  # height, width
+        s = [1, 0.83, 0.67]  # scales
+        f = [None, 3, None]  # flips (2-ud, 3-lr)
+        y = []  # outputs
+        for si, fi in zip(s, f):
+            xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
+            yi = self._forward_once(xi)[0]  # forward
+            # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1])  # save
+            yi = self._descale_pred(yi, fi, si, img_size)
+            y.append(yi)
+        y = self._clip_augmented(y)  # clip augmented tails
+        return torch.cat(y, 1), None  # augmented inference, train
+
+    def _descale_pred(self, p, flips, scale, img_size):
+        # de-scale predictions following augmented inference (inverse operation)
+        if self.inplace:
+            p[..., :4] /= scale  # de-scale
+            if flips == 2:
+                p[..., 1] = img_size[0] - p[..., 1]  # de-flip ud
+            elif flips == 3:
+                p[..., 0] = img_size[1] - p[..., 0]  # de-flip lr
         else:
-            return self.forward_once(x, profile)  # single-scale inference, train
-
-    def forward_once(self, x, profile=False):
-        y, dt = [], []  # outputs
-        for m in self.model:
-            if m.f != -1:  # if not from previous layer
-                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
-
-            if profile:
-                o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPS
-                t = time_synchronized()
-                for _ in range(10):
-                    _ = m(x)
-                dt.append((time_synchronized() - t) * 100)
-                if m == self.model[0]:
-                    logger.info(f"{'time (ms)':>10s} {'GFLOPS':>10s} {'params':>10s}  {'module'}")
-                logger.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f}  {m.type}')
-
-            x = m(x)  # run
-            y.append(x if m.i in self.save else None)  # save output
-
-        if profile:
-            logger.info('%.1fms total' % sum(dt))
-        return x
+            x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale  # de-scale
+            if flips == 2:
+                y = img_size[0] - y  # de-flip ud
+            elif flips == 3:
+                x = img_size[1] - x  # de-flip lr
+            p = torch.cat((x, y, wh, p[..., 4:]), -1)
+        return p
+
+    def _clip_augmented(self, y):
+        # Clip YOLOv5 augmented inference tails
+        nl = self.model[-1].nl  # number of detection layers (P3-P5)
+        g = sum(4 ** x for x in range(nl))  # grid points
+        e = 1  # exclude layer count
+        i = (y[0].shape[1] // g) * sum(4 ** x for x in range(e))  # indices
+        y[0] = y[0][:, :-i]  # large
+        i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e))  # indices
+        y[-1] = y[-1][:, i:]  # small
+        return y
 
     def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
         # https://arxiv.org/abs/1708.02002 section 3.3
@@ -153,58 +257,52 @@ def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is
         for mi, s in zip(m.m, m.stride):  # from
             b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
             b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
-            b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
+            b.data[:, 5:5 + m.nc] += math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum())  # cls
             mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
 
-    def _print_biases(self):
-        m = self.model[-1]  # Detect() module
-        for mi in m.m:  # from
-            b = mi.bias.detach().view(m.na, -1).T  # conv.bias(255) to (3,85)
-            logger.info(
-                ('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
 
-    # def _print_weights(self):
-    #     for m in self.model.modules():
-    #         if type(m) is Bottleneck:
-    #             logger.info('%10.3g' % (m.w.detach().sigmoid() * 2))  # shortcut weights
+Model = DetectionModel  # retain YOLOv5 'Model' class for backwards compatibility
 
-    def fuse(self):  # fuse model Conv2d() + BatchNorm2d() layers
-        logger.info('Fusing layers... ')
-        for m in self.model.modules():
-            if type(m) is Conv and hasattr(m, 'bn'):
-                m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
-                delattr(m, 'bn')  # remove batchnorm
-                m.forward = m.fuseforward  # update forward
-        self.info()
-        return self
 
-    def nms(self, mode=True):  # add or remove NMS module
-        present = type(self.model[-1]) is NMS  # last layer is NMS
-        if mode and not present:
-            logger.info('Adding NMS... ')
-            m = NMS()  # module
-            m.f = -1  # from
-            m.i = self.model[-1].i + 1  # index
-            self.model.add_module(name='%s' % m.i, module=m)  # add
-            self.eval()
-        elif not mode and present:
-            logger.info('Removing NMS... ')
-            self.model = self.model[:-1]  # remove
-        return self
+class SegmentationModel(DetectionModel):
+    # YOLOv5 segmentation model
+    def __init__(self, cfg='yolov5s-seg.yaml', ch=3, nc=None, anchors=None):
+        super().__init__(cfg, ch, nc, anchors)
 
-    def autoshape(self):  # add autoShape module
-        logger.info('Adding autoShape... ')
-        m = autoShape(self)  # wrap model
-        copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=())  # copy attributes
-        return m
 
-    def info(self, verbose=False, img_size=640):  # print model information
-        model_info(self, verbose, img_size)
+class ClassificationModel(BaseModel):
+    # YOLOv5 classification model
+    def __init__(self, cfg=None, model=None, nc=1000, cutoff=10):  # yaml, model, number of classes, cutoff index
+        super().__init__()
+        self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg)
+
+    def _from_detection_model(self, model, nc=1000, cutoff=10):
+        # Create a YOLOv5 classification model from a YOLOv5 detection model
+        if isinstance(model, DetectMultiBackend):
+            model = model.model  # unwrap DetectMultiBackend
+        model.model = model.model[:cutoff]  # backbone
+        m = model.model[-1]  # last layer
+        ch = m.conv.in_channels if hasattr(m, 'conv') else m.cv1.conv.in_channels  # ch into module
+        c = Classify(ch, nc)  # Classify()
+        c.i, c.f, c.type = m.i, m.f, 'models.common.Classify'  # index, from, type
+        model.model[-1] = c  # replace
+        self.model = model.model
+        self.stride = model.stride
+        self.save = []
+        self.nc = nc
+
+    def _from_yaml(self, cfg):
+        # Create a YOLOv5 classification model from a *.yaml file
+        self.model = None
 
 
 def parse_model(d, ch):  # model_dict, input_channels(3)
-    logger.info('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
-    anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
+    # Parse a YOLOv5 model.yaml dictionary
+    LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}  {'module':<40}{'arguments':<30}")
+    anchors, nc, gd, gw, act = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation')
+    if act:
+        Conv.default_act = eval(act)  # redefine default activation, i.e. Conv.default_act = nn.SiLU()
+        LOGGER.info(f"{colorstr('activation:')} {act}")  # print
     na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
     no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
 
@@ -212,30 +310,32 @@ def parse_model(d, ch):  # model_dict, input_channels(3)
     for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):  # from, number, module, args
         m = eval(m) if isinstance(m, str) else m  # eval strings
         for j, a in enumerate(args):
-            try:
+            with contextlib.suppress(NameError):
                 args[j] = eval(a) if isinstance(a, str) else a  # eval strings
-            except:
-                pass
 
-        n = max(round(n * gd), 1) if n > 1 else n  # depth gain
-        if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,
-                 C3, C3TR]:
+        n = n_ = max(round(n * gd), 1) if n > 1 else n  # depth gain
+        if m in {
+                Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
+                BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x}:
             c1, c2 = ch[f], args[0]
             if c2 != no:  # if not output
                 c2 = make_divisible(c2 * gw, 8)
 
             args = [c1, c2, *args[1:]]
-            if m in [BottleneckCSP, C3, C3TR]:
+            if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
                 args.insert(2, n)  # number of repeats
                 n = 1
         elif m is nn.BatchNorm2d:
             args = [ch[f]]
         elif m is Concat:
-            c2 = sum([ch[x] for x in f])
-        elif m is Detect:
+            c2 = sum(ch[x] for x in f)
+        # TODO: channel, gw, gd
+        elif m in {Detect, Segment}:
             args.append([ch[x] for x in f])
             if isinstance(args[1], int):  # number of anchors
                 args[1] = [list(range(args[1] * 2))] * len(f)
+            if m is Segment:
+                args[3] = make_divisible(args[3] * gw, 8)
         elif m is Contract:
             c2 = ch[f] * args[0] ** 2
         elif m is Expand:
@@ -243,11 +343,11 @@ def parse_model(d, ch):  # model_dict, input_channels(3)
         else:
             c2 = ch[f]
 
-        m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
+        m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args)  # module
         t = str(m)[8:-2].replace('__main__.', '')  # module type
-        np = sum([x.numel() for x in m_.parameters()])  # number params
+        np = sum(x.numel() for x in m_.parameters())  # number params
         m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
-        logger.info('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n, np, t, args))  # print
+        LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f}  {t:<40}{str(args):<30}')  # print
         save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
         layers.append(m_)
         if i == 0:
@@ -259,23 +359,33 @@ def parse_model(d, ch):  # model_dict, input_channels(3)
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
+    parser.add_argument('--batch-size', type=int, default=1, help='total batch size for all GPUs')
     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--profile', action='store_true', help='profile model speed')
+    parser.add_argument('--line-profile', action='store_true', help='profile model speed layer by layer')
+    parser.add_argument('--test', action='store_true', help='test all yolo*.yaml')
     opt = parser.parse_args()
-    opt.cfg = check_file(opt.cfg)  # check file
-    set_logging()
+    opt.cfg = check_yaml(opt.cfg)  # check YAML
+    print_args(vars(opt))
     device = select_device(opt.device)
 
     # Create model
+    im = torch.rand(opt.batch_size, 3, 640, 640).to(device)
     model = Model(opt.cfg).to(device)
-    model.train()
-
-    # Profile
-    # img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 320, 320).to(device)
-    # y = model(img, profile=True)
-
-    # Tensorboard (not working https://github.com/ultralytics/yolov5/issues/2898)
-    # from torch.utils.tensorboard import SummaryWriter
-    # tb_writer = SummaryWriter('.')
-    # logger.info("Run 'tensorboard --logdir=models' to view tensorboard at http://localhost:6006/")
-    # tb_writer.add_graph(torch.jit.trace(model, img, strict=False), [])  # add model graph
-    # tb_writer.add_image('test', img[0], dataformats='CWH')  # add model to tensorboard
+
+    # Options
+    if opt.line_profile:  # profile layer by layer
+        model(im, profile=True)
+
+    elif opt.profile:  # profile forward-backward
+        results = profile(input=im, ops=[model], n=3)
+
+    elif opt.test:  # test all models
+        for cfg in Path(ROOT / 'models').rglob('yolo*.yaml'):
+            try:
+                _ = Model(cfg)
+            except Exception as e:
+                print(f'Error in {cfg}: {e}')
+
+    else:  # report fused model summary
+        model.fuse()
diff --git a/models/yolov5l.yaml b/models/yolov5l.yaml
index 71ebf86e5791..ce8a5de46a27 100644
--- a/models/yolov5l.yaml
+++ b/models/yolov5l.yaml
@@ -1,30 +1,30 @@
-# parameters
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple
-
-# anchors
 anchors:
   - [10,13, 16,30, 33,23]  # P3/8
   - [30,61, 62,45, 59,119]  # P4/16
   - [116,90, 156,198, 373,326]  # P5/32
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
    [-1, 3, C3, [128]],
    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 9, C3, [256]],
+   [-1, 6, C3, [256]],
    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
    [-1, 9, C3, [512]],
    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 3, C3, [1024, False]],  # 9
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head
 head:
   [[-1, 1, Conv, [512, 1, 1]],
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
diff --git a/models/yolov5m.yaml b/models/yolov5m.yaml
index 3c749c916246..ad13ab370ff6 100644
--- a/models/yolov5m.yaml
+++ b/models/yolov5m.yaml
@@ -1,30 +1,30 @@
-# parameters
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
 nc: 80  # number of classes
 depth_multiple: 0.67  # model depth multiple
 width_multiple: 0.75  # layer channel multiple
-
-# anchors
 anchors:
   - [10,13, 16,30, 33,23]  # P3/8
   - [30,61, 62,45, 59,119]  # P4/16
   - [116,90, 156,198, 373,326]  # P5/32
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
    [-1, 3, C3, [128]],
    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 9, C3, [256]],
+   [-1, 6, C3, [256]],
    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
    [-1, 9, C3, [512]],
    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 3, C3, [1024, False]],  # 9
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head
 head:
   [[-1, 1, Conv, [512, 1, 1]],
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
diff --git a/models/yolov5n.yaml b/models/yolov5n.yaml
new file mode 100644
index 000000000000..8a28a40d6e20
--- /dev/null
+++ b/models/yolov5n.yaml
@@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.25  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
diff --git a/models/yolov5s.yaml b/models/yolov5s.yaml
index aca669d60d8b..f35beabb1e1c 100644
--- a/models/yolov5s.yaml
+++ b/models/yolov5s.yaml
@@ -1,30 +1,30 @@
-# parameters
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
 nc: 80  # number of classes
 depth_multiple: 0.33  # model depth multiple
 width_multiple: 0.50  # layer channel multiple
-
-# anchors
 anchors:
   - [10,13, 16,30, 33,23]  # P3/8
   - [30,61, 62,45, 59,119]  # P4/16
   - [116,90, 156,198, 373,326]  # P5/32
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
    [-1, 3, C3, [128]],
    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 9, C3, [256]],
+   [-1, 6, C3, [256]],
    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
    [-1, 9, C3, [512]],
    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 3, C3, [1024, False]],  # 9
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head
 head:
   [[-1, 1, Conv, [512, 1, 1]],
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
diff --git a/models/yolov5x.yaml b/models/yolov5x.yaml
index d3babdf7baf0..f617a027d8a2 100644
--- a/models/yolov5x.yaml
+++ b/models/yolov5x.yaml
@@ -1,30 +1,30 @@
-# parameters
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
 nc: 80  # number of classes
 depth_multiple: 1.33  # model depth multiple
 width_multiple: 1.25  # layer channel multiple
-
-# anchors
 anchors:
   - [10,13, 16,30, 33,23]  # P3/8
   - [30,61, 62,45, 59,119]  # P4/16
   - [116,90, 156,198, 373,326]  # P5/32
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
    [-1, 3, C3, [128]],
    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 9, C3, [256]],
+   [-1, 6, C3, [256]],
    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
    [-1, 9, C3, [512]],
    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 3, C3, [1024, False]],  # 9
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head
 head:
   [[-1, 1, Conv, [512, 1, 1]],
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
diff --git a/requirements.txt b/requirements.txt
old mode 100755
new mode 100644
index 94d179bfd9d3..651ebccf87aa
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,19 +4,47 @@ supervisely==6.69.70
 opencv-python-headless==4.5.5.62
 opencv-python==4.5.5.62
 
-# base ----------------------------------------
-PyYAML==5.4.1
-tensorboard==2.4.1
+# Base ------------------------------------------------------------------------
+gitpython>=3.1.30
+matplotlib>=3.2.2
+numpy>=1.18.5
+Pillow>=7.1.2
+psutil  # system resources
+PyYAML>=5.3.1
+requests>=2.23.0
+scipy>=1.4.1
+thop>=0.1.1  # FLOPs computation
+tqdm>=4.64.0
+# protobuf<=3.20.1  # https://github.com/ultralytics/yolov5/issues/8012
 
-# plotting ------------------------------------
-seaborn==0.11.1
+# Logging ---------------------------------------------------------------------
+tensorboard>=2.4.1
+# clearml>=1.2.0
+# comet
 
-# export --------------------------------------
-coremltools==4.1
-onnx>=1.8.0
-onnxruntime==1.8.0
-# scikit-learn==1.1.1  # for coreml quantization
-numpy==1.19.0
-# extras --------------------------------------
-thop==0.0.31-2005241907  # FLOPS computation
-pycocotools==2.0.2  # COCO mAP
+# Plotting --------------------------------------------------------------------
+pandas>=1.1.4
+seaborn>=0.11.0
+
+# Export ----------------------------------------------------------------------
+coremltools>=6.0  # CoreML export
+onnx>=1.12.0  # ONNX export
+onnx-simplifier>=0.4.1  # ONNX simplifier
+# nvidia-pyindex  # TensorRT export
+# nvidia-tensorrt  # TensorRT export
+# scikit-learn<=1.1.2  # CoreML quantization
+# tensorflow>=2.4.1  # TF exports (-cpu, -aarch64, -macos)
+# tensorflowjs>=3.9.0  # TF.js export
+# openvino-dev  # OpenVINO export
+
+# Deploy ----------------------------------------------------------------------
+setuptools>=65.5.1 # Snyk vulnerability fix
+# tritonclient[all]~=2.24.0
+
+# Extras ----------------------------------------------------------------------
+# ipython  # interactive notebook
+# mss  # screenshots
+# albumentations>=1.0.3
+pycocotools>=2.0.6  # COCO mAP
+# roboflow
+# ultralytics  # HUB https://hub.ultralytics.com
diff --git a/supervisely/serve/src/main.py b/supervisely/serve/src/main.py
index 09caa686d60a..14c38a4f8a37 100644
--- a/supervisely/serve/src/main.py
+++ b/supervisely/serve/src/main.py
@@ -1,21 +1,26 @@
 import os
 import sys
+
 try:
     from typing import Literal
 except:
     from typing_extensions import Literal
-from typing import List, Dict, Any
+
+from pathlib import Path
+from typing import Any, Dict, List
+
+import numpy as np
+import torch
 import yaml
 from dotenv import load_dotenv
-import torch
-import numpy as np
-import supervisely as sly
 from supervisely.geometry.sliding_windows_fuzzy import SlidingWindowsFuzzy
-from utils.torch_utils import select_device
+
+import supervisely as sly
 from models.experimental import attempt_load
-from utils.general import check_img_size, non_max_suppression, scale_coords, xywh2xyxy
-from utils.datasets import letterbox
-from pathlib import Path
+from utils.dataloaders import letterbox
+from utils.general import (check_img_size, non_max_suppression, scale_boxes,
+                           xywh2xyxy)
+from utils.torch_utils import select_device
 
 root_source_path = str(Path(__file__).parents[3])
 app_source_path = str(Path(__file__).parents[1])
@@ -26,7 +31,7 @@
 pretrained_weights = os.environ['modal.state.selectedModel'].lower()
 custom_weights = os.environ['modal.state.weightsPath']
 
-pretrained_weights_url = f"https://github.com/ultralytics/yolov5/releases/download/v5.0/{pretrained_weights}.pt"
+pretrained_weights_url = f"https://github.com/ultralytics/yolov5/releases/download/v7.0/{pretrained_weights}.pt"
 
 class YOLOv5Model(sly.nn.inference.ObjectDetection):
     def load_on_device(
@@ -134,7 +139,7 @@ def predict(
         predictions = []
         for det in output:
             if det is not None and len(det) > 0:
-                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round()
+                det[:, :4] = scale_boxes(img.shape[2:], det[:, :4], img0.shape).round()
 
                 for *xyxy, conf, cls in reversed(det):
                     bbox = [int(xyxy[1]), int(xyxy[0]), int(xyxy[3]), int(xyxy[2])]
@@ -172,7 +177,7 @@ def predict_raw(
         conf, j = inf_out[:, 5:].max(1, keepdim=True) # best class
         det = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
 
-        det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round()
+        det[:, :4] = scale_boxes(img.shape[2:], det[:, :4], img0.shape).round()
 
         for *xyxy, conf, cls in reversed(det):
             bbox = [int(xyxy[1]), int(xyxy[0]), int(xyxy[3]), int(xyxy[2])]
diff --git a/test_yolov5.py b/test_yolov5.py
index 19eb6eba3039..337c7eb164b5 100644
--- a/test_yolov5.py
+++ b/test_yolov5.py
@@ -1,243 +1,293 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Validate a trained YOLOv5 detection model on a detection dataset
+
+Usage:
+    $ python val.py --weights yolov5s.pt --data coco128.yaml --img 640
+
+Usage - formats:
+    $ python val.py --weights yolov5s.pt                 # PyTorch
+                              yolov5s.torchscript        # TorchScript
+                              yolov5s.onnx               # ONNX Runtime or OpenCV DNN with --dnn
+                              yolov5s_openvino_model     # OpenVINO
+                              yolov5s.engine             # TensorRT
+                              yolov5s.mlmodel            # CoreML (macOS-only)
+                              yolov5s_saved_model        # TensorFlow SavedModel
+                              yolov5s.pb                 # TensorFlow GraphDef
+                              yolov5s.tflite             # TensorFlow Lite
+                              yolov5s_edgetpu.tflite     # TensorFlow Edge TPU
+                              yolov5s_paddle_model       # PaddlePaddle
+"""
+
 import argparse
 import json
 import os
+import subprocess
+import sys
 from pathlib import Path
-from threading import Thread
 
 import numpy as np
 import torch
-import yaml
 from tqdm import tqdm
 
-from models.experimental import attempt_load
-from utils.datasets import create_dataloader
-from utils.general import coco80_to_coco91_class, check_dataset, check_file, check_img_size, check_requirements, \
-    box_iou, non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, set_logging, increment_path, colorstr
-from utils.metrics import ap_per_class, ConfusionMatrix
-from utils.plots import plot_images, output_to_target, plot_study_txt
-from utils.torch_utils import select_device, time_synchronized
-
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[0]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 from sly_train_utils import upload_pred_vis
-import supervisely as sly
-
-
-def test(data,
-         weights=None,
-         batch_size=32,
-         imgsz=640,
-         conf_thres=0.001,
-         iou_thres=0.6,  # for NMS
-         save_json=False,
-         single_cls=False,
-         augment=False,
-         verbose=False,
-         model=None,
-         dataloader=None,
-         save_dir=Path(''),  # for saving images
-         save_txt=False,  # for auto-labelling
-         save_hybrid=False,  # for hybrid auto-labelling
-         save_conf=False,  # save auto-label confidences
-         plots=True,
-         wandb_logger=None,
-         compute_loss=None,
-         half_precision=True,
-         is_coco=False,
-         opt=None,
-         opt_sly=False):
 
+import supervisely as sly
+from models.common import DetectMultiBackend
+from utils.callbacks import Callbacks
+from utils.dataloaders import create_dataloader
+from utils.general import (LOGGER, TQDM_BAR_FORMAT, Profile, check_dataset,
+                           check_img_size, check_requirements, check_yaml,
+                           coco80_to_coco91_class, colorstr, increment_path,
+                           non_max_suppression, print_args, scale_boxes,
+                           xywh2xyxy, xyxy2xywh)
+from utils.metrics import ConfusionMatrix, ap_per_class, box_iou
+from utils.plots import output_to_target, plot_images, plot_val_study
+from utils.torch_utils import select_device, smart_inference_mode
+
+
+def save_one_txt(predn, save_conf, shape, file):
+    # Save one txt result
+    gn = torch.tensor(shape)[[1, 0, 1, 0]]  # normalization gain whwh
+    for *xyxy, conf, cls in predn.tolist():
+        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
+        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
+        with open(file, 'a') as f:
+            f.write(('%g ' * len(line)).rstrip() % line + '\n')
+
+
+def save_one_json(predn, jdict, path, class_map):
+    # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
+    image_id = int(path.stem) if path.stem.isnumeric() else path.stem
+    box = xyxy2xywh(predn[:, :4])  # xywh
+    box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
+    for p, b in zip(predn.tolist(), box.tolist()):
+        jdict.append({
+            'image_id': image_id,
+            'category_id': class_map[int(p[5])],
+            'bbox': [round(x, 3) for x in b],
+            'score': round(p[4], 5)})
+
+
+def process_batch(detections, labels, iouv):
+    """
+    Return correct prediction matrix
+    Arguments:
+        detections (array[N, 6]), x1, y1, x2, y2, conf, class
+        labels (array[M, 5]), class, x1, y1, x2, y2
+    Returns:
+        correct (array[N, 10]), for 10 IoU levels
+    """
+    correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool)
+    iou = box_iou(labels[:, 1:], detections[:, :4])
+    correct_class = labels[:, 0:1] == detections[:, 5]
+    for i in range(len(iouv)):
+        x = torch.where((iou >= iouv[i]) & correct_class)  # IoU > threshold and classes match
+        if x[0].shape[0]:
+            matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()  # [label, detect, iou]
+            if x[0].shape[0] > 1:
+                matches = matches[matches[:, 2].argsort()[::-1]]
+                matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+                # matches = matches[matches[:, 2].argsort()[::-1]]
+                matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+            correct[matches[:, 1].astype(int), i] = True
+    return torch.tensor(correct, dtype=torch.bool, device=iouv.device)
+
+
+@smart_inference_mode()
+def run(
+        data,
+        weights=None,  # model.pt path(s)
+        batch_size=32,  # batch size
+        imgsz=640,  # inference size (pixels)
+        conf_thres=0.001,  # confidence threshold
+        iou_thres=0.6,  # NMS IoU threshold
+        max_det=300,  # maximum detections per image
+        task='val',  # train, val, test, speed or study
+        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
+        workers=8,  # max dataloader workers (per RANK in DDP mode)
+        single_cls=False,  # treat as single-class dataset
+        augment=False,  # augmented inference
+        verbose=False,  # verbose output
+        save_txt=False,  # save results to *.txt
+        save_hybrid=False,  # save label+prediction hybrid results to *.txt
+        save_conf=False,  # save confidences in --save-txt labels
+        save_json=False,  # save a COCO-JSON results file
+        project=ROOT / 'runs/val',  # save to project/name
+        name='exp',  # save to project/name
+        exist_ok=False,  # existing project/name ok, do not increment
+        half=True,  # use FP16 half-precision inference
+        dnn=False,  # use OpenCV DNN for ONNX inference
+        model=None,
+        dataloader=None,
+        save_dir=Path(''),
+        plots=True,
+        callbacks=Callbacks(),
+        compute_loss=None,
+        opt_sly=False,
+):
     # Initialize/load model and set device
     training = model is not None
     if training:  # called by train.py
-        device = next(model.parameters()).device  # get model device
-
+        device, pt, jit, engine = next(model.parameters()).device, True, False, False  # get model device, PyTorch model
+        half &= device.type != 'cpu'  # half precision only supported on CUDA
+        model.half() if half else model.float()
     else:  # called directly
-        set_logging()
-        device = select_device(opt.device, batch_size=batch_size)
+        device = select_device(device, batch_size=batch_size)
 
         # Directories
-        save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)  # increment run
+        save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
         (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
 
         # Load model
-        model = attempt_load(weights, map_location=device)  # load FP32 model
-        gs = max(int(model.stride.max()), 32)  # grid size (max stride)
-        imgsz = check_img_size(imgsz, s=gs)  # check img_size
-
-        # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
-        # if device.type != 'cpu' and torch.cuda.device_count() > 1:
-        #     model = nn.DataParallel(model)
-
-    # Half
-    half = device.type != 'cpu' and half_precision  # half precision only supported on CUDA
-    if half:
-        model.half()
+        model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
+        stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
+        imgsz = check_img_size(imgsz, s=stride)  # check image size
+        half = model.fp16  # FP16 supported on limited backends with CUDA
+        if engine:
+            batch_size = model.batch_size
+        else:
+            device = model.device
+            if not (pt or jit):
+                batch_size = 1  # export.py models default to batch-size 1
+                LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models')
+
+        # Data
+        data = check_dataset(data)  # check
 
     # Configure
     model.eval()
-    if isinstance(data, str):
-        is_coco = data.endswith('coco.yaml')
-        with open(data) as f:
-            data = yaml.safe_load(f)
-    check_dataset(data)  # check
+    cuda = device.type != 'cpu'
+    is_coco = isinstance(data.get('val'), str) and data['val'].endswith(f'coco{os.sep}val2017.txt')  # COCO dataset
     nc = 1 if single_cls else int(data['nc'])  # number of classes
-    iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for mAP@0.5:0.95
+    iouv = torch.linspace(0.5, 0.95, 10, device=device)  # iou vector for mAP@0.5:0.95
     niou = iouv.numel()
 
-    # Logging
-    log_imgs = 0
-    if wandb_logger and wandb_logger.wandb:
-        log_imgs = min(wandb_logger.log_imgs, 100)
     # Dataloader
     if not training:
-        if device.type != 'cpu':
-            model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
-        task = opt.task if opt.task in ('train', 'val', 'test') else 'val'  # path to train/val/test images
-        dataloader = create_dataloader(data[task], imgsz, batch_size, gs, opt, pad=0.5, rect=True,
+        if pt and not single_cls:  # check --weights are trained on --data
+            ncm = model.model.nc
+            assert ncm == nc, f'{weights} ({ncm} classes) trained on different --data than what you passed ({nc} ' \
+                              f'classes). Pass correct combination of --weights and --data that are trained together.'
+        model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz))  # warmup
+        pad, rect = (0.0, False) if task == 'speed' else (0.5, pt)  # square inference for benchmarks
+        task = task if task in ('train', 'val', 'test') else 'val'  # path to train/val/test images
+        dataloader = create_dataloader(data[task],
+                                       imgsz,
+                                       batch_size,
+                                       stride,
+                                       single_cls,
+                                       pad=pad,
+                                       rect=rect,
+                                       workers=workers,
                                        prefix=colorstr(f'{task}: '))[0]
 
     seen = 0
     confusion_matrix = ConfusionMatrix(nc=nc)
-    names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)}
-    coco91class = coco80_to_coco91_class()
-    #s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')
-    p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
+    names = model.names if hasattr(model, 'names') else model.module.names  # get class names
+    if isinstance(names, (list, tuple)):  # old format
+        names = dict(enumerate(names))
+    class_map = coco80_to_coco91_class() if is_coco else list(range(1000))
+    s = ('%22s' + '%11s' * 6) % ('Class', 'Images', 'Instances', 'P', 'R', 'mAP50', 'mAP50-95')
+    tp, fp, p, r, f1, mp, mr, map50, ap50, map = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
+    dt = Profile(), Profile(), Profile()  # profiling times
     loss = torch.zeros(3, device=device)
-    jdict, stats, ap, ap_class, wandb_images = [], [], [], [], []
-    #for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
-    for batch_i, (img, targets, paths, shapes) in enumerate(dataloader):
-        img = img.to(device, non_blocking=True)
-        img = img.half() if half else img.float()  # uint8 to fp16/32
-        img /= 255.0  # 0 - 255 to 0.0 - 1.0
-        targets = targets.to(device)
-        nb, _, height, width = img.shape  # batch size, channels, height, width
-
-        with torch.no_grad():
-            # Run model
-            t = time_synchronized()
-            inf_out, train_out = model(img, augment=augment)  # inference and training outputs
-            t0 += time_synchronized() - t
-
-            # Compute loss
-            if compute_loss:
-                loss += compute_loss([x.float() for x in train_out], targets)[1][:3]  # box, obj, cls
-
-            # Run NMS
-            targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device)  # to pixels
-            lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling
-            t = time_synchronized()
-            output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb, multi_label=True, agnostic=single_cls)
-            t1 += time_synchronized() - t
-
-        # Statistics per image
-        for si, pred in enumerate(output):
+    jdict, stats, ap, ap_class = [], [], [], []
+    callbacks.run('on_val_start')
+    pbar = tqdm(dataloader, desc=s, bar_format=TQDM_BAR_FORMAT)  # progress bar
+    for batch_i, (im, targets, paths, shapes) in enumerate(pbar):
+        callbacks.run('on_val_batch_start')
+        with dt[0]:
+            if cuda:
+                im = im.to(device, non_blocking=True)
+                targets = targets.to(device)
+            im = im.half() if half else im.float()  # uint8 to fp16/32
+            im /= 255  # 0 - 255 to 0.0 - 1.0
+            nb, _, height, width = im.shape  # batch size, channels, height, width
+
+        # Inference
+        with dt[1]:
+            preds, train_out = model(im) if compute_loss else (model(im, augment=augment), None)
+
+        # Loss
+        if compute_loss:
+            loss += compute_loss(train_out, targets)[1]  # box, obj, cls
+
+        # NMS
+        targets[:, 2:] *= torch.tensor((width, height, width, height), device=device)  # to pixels
+        lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling
+        with dt[2]:
+            preds = non_max_suppression(preds,
+                                        conf_thres,
+                                        iou_thres,
+                                        labels=lb,
+                                        multi_label=True,
+                                        agnostic=single_cls,
+                                        max_det=max_det)
+
+        # Metrics
+        for si, pred in enumerate(preds):
             labels = targets[targets[:, 0] == si, 1:]
-            nl = len(labels)
-            tcls = labels[:, 0].tolist() if nl else []  # target class
-            path = Path(paths[si])
+            nl, npr = labels.shape[0], pred.shape[0]  # number of labels, predictions
+            path, shape = Path(paths[si]), shapes[si][0]
+            correct = torch.zeros(npr, niou, dtype=torch.bool, device=device)  # init
             seen += 1
 
-            if len(pred) == 0:
+            if npr == 0:
                 if nl:
-                    stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))
+                    stats.append((correct, *torch.zeros((2, 0), device=device), labels[:, 0]))
+                    if plots:
+                        confusion_matrix.process_batch(detections=None, labels=labels[:, 0])
                 continue
 
             # Predictions
             if single_cls:
                 pred[:, 5] = 0
             predn = pred.clone()
-            scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1])  # native-space pred
+            scale_boxes(im[si].shape[1:], predn[:, :4], shape, shapes[si][1])  # native-space pred
 
-            # Append to text file
-            if save_txt:
-                gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]]  # normalization gain whwh
-                for *xyxy, conf, cls in predn.tolist():
-                    xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
-                    line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
-                    with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f:
-                        f.write(('%g ' * len(line)).rstrip() % line + '\n')
-
-            # W&B logging - Media Panel Plots
-            if len(wandb_images) < log_imgs and wandb_logger.current_epoch > 0:  # Check for test operation
-                if wandb_logger.current_epoch % wandb_logger.bbox_interval == 0:
-                    box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
-                                 "class_id": int(cls),
-                                 "box_caption": "%s %.3f" % (names[cls], conf),
-                                 "scores": {"class_score": conf},
-                                 "domain": "pixel"} for *xyxy, conf, cls in pred.tolist()]
-                    boxes = {"predictions": {"box_data": box_data, "class_labels": names}}  # inference-space
-                    wandb_images.append(wandb_logger.wandb.Image(img[si], boxes=boxes, caption=path.name))
-            wandb_logger.log_training_progress(predn, path, names) if wandb_logger and wandb_logger.wandb_run else None
-
-            # Append to pycocotools JSON dictionary
-            if save_json:
-                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
-                image_id = int(path.stem) if path.stem.isnumeric() else path.stem
-                box = xyxy2xywh(predn[:, :4])  # xywh
-                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
-                for p, b in zip(pred.tolist(), box.tolist()):
-                    jdict.append({'image_id': image_id,
-                                  'category_id': coco91class[int(p[5])] if is_coco else int(p[5]),
-                                  'bbox': [round(x, 3) for x in b],
-                                  'score': round(p[4], 5)})
-
-            # Assign all predictions as incorrect
-            correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device)
+            # Evaluate
             if nl:
-                detected = []  # target indices
-                tcls_tensor = labels[:, 0]
-
-                # target boxes
-                tbox = xywh2xyxy(labels[:, 1:5])
-                scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1])  # native-space labels
+                tbox = xywh2xyxy(labels[:, 1:5])  # target boxes
+                scale_boxes(im[si].shape[1:], tbox, shape, shapes[si][1])  # native-space labels
+                labelsn = torch.cat((labels[:, 0:1], tbox), 1)  # native-space labels
+                correct = process_batch(predn, labelsn, iouv)
                 if plots:
-                    confusion_matrix.process_batch(predn, torch.cat((labels[:, 0:1], tbox), 1))
-
-                # Per target class
-                for cls in torch.unique(tcls_tensor):
-                    ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1)  # prediction indices
-                    pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1)  # target indices
-
-                    # Search for detections
-                    if pi.shape[0]:
-                        # Prediction to target ious
-                        ious, i = box_iou(predn[pi, :4], tbox[ti]).max(1)  # best ious, indices
-
-                        # Append detections
-                        detected_set = set()
-                        for j in (ious > iouv[0]).nonzero(as_tuple=False):
-                            d = ti[i[j]]  # detected target
-                            if d.item() not in detected_set:
-                                detected_set.add(d.item())
-                                detected.append(d)
-                                correct[pi[j]] = ious[j] > iouv  # iou_thres is 1xn
-                                if len(detected) == nl:  # all targets already located in image
-                                    break
-
-            # Append statistics (correct, conf, pcls, tcls)
-            stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))
+                    confusion_matrix.process_batch(predn, labelsn)
+            stats.append((correct, pred[:, 4], pred[:, 5], labels[:, 0]))  # (correct, conf, pcls, tcls)
+
+            # Save/log
+            if save_txt:
+                save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt')
+            if save_json:
+                save_one_json(predn, jdict, path, class_map)  # append to COCO-JSON dictionary
+            callbacks.run('on_val_image_end', pred, predn, path, names, im[si])
 
         # Plot images
         if plots and batch_i < 3:
-            f = save_dir / f'test_batch{batch_i}_labels.jpg'  # labels
-            plot_images(img, targets, paths, f, names)
-            # Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start()
-            f = save_dir / f'test_batch{batch_i}_pred.jpg'  # predictions
-            plot_images(img, output_to_target(output), paths, f, names)
-            # Thread(target=plot_images, args=(img, output_to_target(output), paths, f, names), daemon=True).start()
-
-    # Compute statistics
-    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
+            plot_images(im, targets, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names)  # labels
+            plot_images(im, output_to_target(preds), paths, save_dir / f'val_batch{batch_i}_pred.jpg', names)  # pred
+
+        callbacks.run('on_val_batch_end', batch_i, im, targets, paths, shapes, preds)
+
+    # Compute metrics
+    stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)]  # to numpy
     if len(stats) and stats[0].any():
-        p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names)
+        tp, fp, p, r, f1, ap, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names)
         ap50, ap = ap[:, 0], ap.mean(1)  # AP@0.5, AP@0.5:0.95
         mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
-        nt = np.bincount(stats[3].astype(np.int64), minlength=nc)  # number of targets per class
-    else:
-        nt = torch.zeros(1)
+    nt = np.bincount(stats[3].astype(int), minlength=nc)  # number of targets per class
 
     # Print results
-    pf = '%20s' + '%12i' * 2 + '%12.3g' * 4  # print format
-    #print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))
+    pf = '%22s' + '%11i' * 2 + '%11.3g' * 4  # print format
+    LOGGER.info(pf % ('all', seen, nt.sum(), mp, mr, map50, map))
+    if nt.sum() == 0:
+        LOGGER.warning(f'WARNING ⚠️ no labels found in {task} set, can not compute metrics without labels')
 
     sly.logger.info("Test", extra={'Class': '%20s' % 'all',
                                    'Images': '%12i' % seen,
@@ -250,34 +300,32 @@ def test(data,
     # Print results per class
     if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats):
         for i, c in enumerate(ap_class):
-            print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))
+            LOGGER.info(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))
 
     # Print speeds
-    t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size)  # tuple
+    t = tuple(x.t / seen * 1E3 for x in dt)  # speeds per image
     if not training:
-        print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)
+        shape = (batch_size, 3, imgsz, imgsz)
+        LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t)
 
     # Plots
     if plots:
         confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
-        if wandb_logger and wandb_logger.wandb:
-            val_batches = [wandb_logger.wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob('test*.jpg'))]
-            wandb_logger.log({"Validation": val_batches})
+        callbacks.run('on_val_end', nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix)
         if opt_sly:
             upload_pred_vis()
-    if wandb_images:
-        wandb_logger.log({"Bounding Box Debugger/Images": wandb_images})
 
     # Save JSON
     if save_json and len(jdict):
         w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else ''  # weights
-        anno_json = '../coco/annotations/instances_val2017.json'  # annotations json
-        pred_json = str(save_dir / f"{w}_predictions.json")  # predictions json
-        print('\nEvaluating pycocotools mAP... saving %s...' % pred_json)
+        anno_json = str(Path('../datasets/coco/annotations/instances_val2017.json'))  # annotations
+        pred_json = str(save_dir / f'{w}_predictions.json')  # predictions
+        LOGGER.info(f'\nEvaluating pycocotools mAP... saving {pred_json}...')
         with open(pred_json, 'w') as f:
             json.dump(jdict, f)
 
         try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
+            check_requirements('pycocotools>=2.0.6')
             from pycocotools.coco import COCO
             from pycocotools.cocoeval import COCOeval
 
@@ -285,83 +333,92 @@ def test(data,
             pred = anno.loadRes(pred_json)  # init predictions api
             eval = COCOeval(anno, pred, 'bbox')
             if is_coco:
-                eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files]  # image IDs to evaluate
+                eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files]  # image IDs to evaluate
             eval.evaluate()
             eval.accumulate()
             eval.summarize()
             map, map50 = eval.stats[:2]  # update results (mAP@0.5:0.95, mAP@0.5)
         except Exception as e:
-            print(f'pycocotools unable to run: {e}')
+            LOGGER.info(f'pycocotools unable to run: {e}')
 
     # Return results
     model.float()  # for training
     if not training:
         s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
-        print(f"Results saved to {save_dir}{s}")
+        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
     maps = np.zeros(nc) + map
     for i, c in enumerate(ap_class):
         maps[c] = ap[i]
     return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
 
 
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(prog='test.py')
-    parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
-    parser.add_argument('--data', type=str, default='data/coco128.yaml', help='*.data path')
-    parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch')
-    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
-    parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')
-    parser.add_argument('--iou-thres', type=float, default=0.6, help='IOU threshold for NMS')
+def parse_opt():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
+    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)')
+    parser.add_argument('--batch-size', type=int, default=32, help='batch size')
+    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
+    parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold')
+    parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold')
+    parser.add_argument('--max-det', type=int, default=300, help='maximum detections per image')
     parser.add_argument('--task', default='val', help='train, val, test, speed or study')
     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
     parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
     parser.add_argument('--augment', action='store_true', help='augmented inference')
     parser.add_argument('--verbose', action='store_true', help='report mAP by class')
     parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
     parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt')
     parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
-    parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
-    parser.add_argument('--project', default='runs/test', help='save to project/name')
+    parser.add_argument('--save-json', action='store_true', help='save a COCO-JSON results file')
+    parser.add_argument('--project', default=ROOT / 'runs/val', help='save to project/name')
     parser.add_argument('--name', default='exp', help='save to project/name')
     parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
+    parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
+    parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
     opt = parser.parse_args()
+    opt.data = check_yaml(opt.data)  # check YAML
     opt.save_json |= opt.data.endswith('coco.yaml')
-    opt.data = check_file(opt.data)  # check file
-    print(opt)
-    check_requirements()
+    opt.save_txt |= opt.save_hybrid
+    print_args(vars(opt))
+    return opt
+
+
+def main(opt):
+    check_requirements(exclude=('tensorboard', 'thop'))
 
     if opt.task in ('train', 'val', 'test'):  # run normally
-        test(opt.data,
-             opt.weights,
-             opt.batch_size,
-             opt.img_size,
-             opt.conf_thres,
-             opt.iou_thres,
-             opt.save_json,
-             opt.single_cls,
-             opt.augment,
-             opt.verbose,
-             save_txt=opt.save_txt | opt.save_hybrid,
-             save_hybrid=opt.save_hybrid,
-             save_conf=opt.save_conf,
-             opt=opt
-             )
-
-    elif opt.task == 'speed':  # speed benchmarks
-        for w in opt.weights:
-            test(opt.data, w, opt.batch_size, opt.img_size, 0.25, 0.45, save_json=False, plots=False, opt=opt)
-
-    elif opt.task == 'study':  # run over a range of settings and save/plot
-        # python test.py --task study --data coco.yaml --iou 0.7 --weights yolov5s.pt yolov5m.pt yolov5l.pt yolov5x.pt
-        x = list(range(256, 1536 + 128, 128))  # x axis (image sizes)
-        for w in opt.weights:
-            f = f'study_{Path(opt.data).stem}_{Path(w).stem}.txt'  # filename to save to
-            y = []  # y axis
-            for i in x:  # img-size
-                print(f'\nRunning {f} point {i}...')
-                r, _, t = test(opt.data, w, opt.batch_size, i, opt.conf_thres, opt.iou_thres, opt.save_json,
-                               plots=False, opt=opt)
-                y.append(r + t)  # results and times
-            np.savetxt(f, y, fmt='%10.4g')  # save
-        os.system('zip -r study.zip study_*.txt')
-        plot_study_txt(x=x)  # plot
+        if opt.conf_thres > 0.001:  # https://github.com/ultralytics/yolov5/issues/1466
+            LOGGER.info(f'WARNING ⚠️ confidence threshold {opt.conf_thres} > 0.001 produces invalid results')
+        if opt.save_hybrid:
+            LOGGER.info('WARNING ⚠️ --save-hybrid will return high mAP from hybrid labels, not from predictions alone')
+        run(**vars(opt))
+
+    else:
+        weights = opt.weights if isinstance(opt.weights, list) else [opt.weights]
+        opt.half = torch.cuda.is_available() and opt.device != 'cpu'  # FP16 for fastest results
+        if opt.task == 'speed':  # speed benchmarks
+            # python val.py --task speed --data coco.yaml --batch 1 --weights yolov5n.pt yolov5s.pt...
+            opt.conf_thres, opt.iou_thres, opt.save_json = 0.25, 0.45, False
+            for opt.weights in weights:
+                run(**vars(opt), plots=False)
+
+        elif opt.task == 'study':  # speed vs mAP benchmarks
+            # python val.py --task study --data coco.yaml --iou 0.7 --weights yolov5n.pt yolov5s.pt...
+            for opt.weights in weights:
+                f = f'study_{Path(opt.data).stem}_{Path(opt.weights).stem}.txt'  # filename to save to
+                x, y = list(range(256, 1536 + 128, 128)), []  # x axis (image sizes), y axis
+                for opt.imgsz in x:  # img-size
+                    LOGGER.info(f'\nRunning {f} --imgsz {opt.imgsz}...')
+                    r, _, t = run(**vars(opt), plots=False)
+                    y.append(r + t)  # results and times
+                np.savetxt(f, y, fmt='%10.4g')  # save
+            subprocess.run(['zip', '-r', 'study.zip', 'study_*.txt'])
+            plot_val_study(x=x)  # plot
+        else:
+            raise NotImplementedError(f'--task {opt.task} not in ("train", "val", "test", "speed", "study")')
+
+
+if __name__ == '__main__':
+    opt = parse_opt()
+    main(opt)
diff --git a/train.py b/train.py
index ab378ce22160..28b777b3062b 100644
--- a/train.py
+++ b/train.py
@@ -1,649 +1,615 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Train a YOLOv5 model on a custom dataset.
+Models and datasets download automatically from the latest YOLOv5 release.
+
+Usage - Single-GPU training:
+    $ python train.py --data coco128.yaml --weights yolov5s.pt --img 640  # from pretrained (recommended)
+    $ python train.py --data coco128.yaml --weights '' --cfg yolov5s.yaml --img 640  # from scratch
+
+Usage - Multi-GPU DDP training:
+    $ python -m torch.distributed.run --nproc_per_node 4 --master_port 1 train.py --data coco128.yaml --weights yolov5s.pt --img 640 --device 0,1,2,3
+
+Models:     https://github.com/ultralytics/yolov5/tree/master/models
+Datasets:   https://github.com/ultralytics/yolov5/tree/master/data
+Tutorial:   https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data
+"""
+
 import argparse
-import logging
 import math
 import os
 import random
+import subprocess
+import sys
 import time
 from copy import deepcopy
+from datetime import datetime
 from pathlib import Path
-from threading import Thread
 
 import numpy as np
+import torch
 import torch.distributed as dist
 import torch.nn as nn
-import torch.nn.functional as F
-import torch.optim as optim
-import torch.optim.lr_scheduler as lr_scheduler
-import torch.utils.data
 import yaml
-from torch.cuda import amp
-from torch.nn.parallel import DistributedDataParallel as DDP
-from torch.utils.tensorboard import SummaryWriter
+from torch.optim import lr_scheduler
 from tqdm import tqdm
 
-#import test  # import test.py to get mAP after each epoch
-import test_yolov5 as test
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[0]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 
-from models.experimental import attempt_load
-from models.yolo import Model
-from utils.autoanchor import check_anchors
-from utils.datasets import create_dataloader
-from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \
-    fitness, strip_optimizer, get_latest_run, check_dataset, check_file, check_git_status, check_img_size, \
-    check_requirements, print_mutation, set_logging, one_cycle, colorstr
-from utils.google_utils import attempt_download
-from utils.loss import ComputeLoss
-from utils.plots import plot_images, plot_labels, plot_results, plot_evolution
-from utils.torch_utils import ModelEMA, select_device, intersect_dicts, torch_distributed_zero_first, is_parallel
-from utils.wandb_logging.wandb_utils import WandbLogger, check_wandb_resume
-
-from sly_train_utils import send_epoch_log, upload_label_vis, upload_train_data_vis
 from sly_metrics import send_metrics
+from sly_train_utils import (send_epoch_log, upload_label_vis,
+                             upload_train_data_vis)
 
-#logger = logging.getLogger(__name__)
 import supervisely as sly
+import test_yolov5 as test
+from models.experimental import attempt_load
+from models.yolo import Model
 from supervisely import logger
-
-
-def train(hyp, opt, device, tb_writer=None):
+from utils.autoanchor import check_anchors
+from utils.autobatch import check_train_batch_size
+from utils.callbacks import Callbacks
+from utils.dataloaders import create_dataloader
+from utils.downloads import attempt_download, is_url
+from utils.general import (LOGGER, TQDM_BAR_FORMAT, check_amp, check_dataset,
+                           check_file, check_git_info, check_git_status,
+                           check_img_size, check_requirements, check_suffix,
+                           check_yaml, colorstr, get_latest_run,
+                           increment_path, init_seeds, intersect_dicts,
+                           labels_to_class_weights, labels_to_image_weights,
+                           methods, one_cycle, print_args, print_mutation,
+                           strip_optimizer, yaml_save)
+from utils.loggers import Loggers
+from utils.loggers.comet.comet_utils import check_comet_resume
+from utils.loss import ComputeLoss
+from utils.metrics import fitness
+from utils.plots import plot_evolve
+from utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel,
+                               select_device, smart_DDP, smart_optimizer,
+                               smart_resume, torch_distributed_zero_first)
+
+LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))  # https://pytorch.org/docs/stable/elastic/run.html
+RANK = int(os.getenv('RANK', -1))
+WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
+GIT_INFO = check_git_info()
+
+
+def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictionary
+    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = \
+        Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
+        opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze
+    callbacks.run('on_pretrain_routine_start')
     train_batches_uploaded = False
 
-    logger.info('hyperparameters', extra=hyp)
-    save_dir, epochs, batch_size, total_batch_size, weights, rank = \
-        Path(opt.save_dir), opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank
-
     # Directories
-    wdir = save_dir / 'weights'
-    wdir.mkdir(parents=True, exist_ok=True)  # make dir
-    last = wdir / 'last.pt'
-    best = wdir / 'best.pt'
-    results_file = save_dir / 'results.txt'
+    w = save_dir / 'weights'  # weights dir
+    (w.parent if evolve else w).mkdir(parents=True, exist_ok=True)  # make dir
+    last, best = w / 'last.pt', w / 'best.pt'
 
-    # Save run settings
-    with open(save_dir / 'hyp.yaml', 'w') as f:
-        yaml.safe_dump(hyp, f, sort_keys=False)
-    with open(save_dir / 'opt.yaml', 'w') as f:
-        yaml.safe_dump(vars(opt), f, sort_keys=False)
+    # Hyperparameters
+    if isinstance(hyp, str):
+        with open(hyp, errors='ignore') as f:
+            hyp = yaml.safe_load(f)  # load hyps dict
+    LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
+    opt.hyp = hyp.copy()  # for saving hyps to checkpoints
 
-    # Configure
-    plots = not opt.evolve  # create plots
+    # Save run settings
+    if not evolve:
+        yaml_save(save_dir / 'hyp.yaml', hyp)
+        yaml_save(save_dir / 'opt.yaml', vars(opt))
+
+    # Loggers
+    data_dict = None
+    if RANK in {-1, 0}:
+        loggers = Loggers(save_dir, weights, opt, hyp, LOGGER)  # loggers instance
+
+        # Register actions
+        for k in methods(loggers):
+            callbacks.register_action(k, callback=getattr(loggers, k))
+
+        # Process custom dataset artifact link
+        data_dict = loggers.remote_dataset
+        if resume:  # If resuming runs from remote artifact
+            weights, epochs, hyp, batch_size = opt.weights, opt.epochs, opt.hyp, opt.batch_size
+
+    # Config
+    plots = not evolve and not opt.noplots  # create plots
     cuda = device.type != 'cpu'
-    init_seeds(2 + rank)
-    with open(opt.data) as f:
-        data_dict = yaml.safe_load(f)  # data dict
-    is_coco = opt.data.endswith('coco.yaml')
-
-    # Logging- Doing this before checking the dataset. Might update data_dict
-    loggers = {'wandb': None}  # loggers dict
-    if rank in [-1, 0]:
-        opt.hyp = hyp  # add hyperparameters
-        run_id = torch.load(weights).get('wandb_id') if weights.endswith('.pt') and os.path.isfile(weights) else None
-        wandb_logger = WandbLogger(opt, save_dir.stem, run_id, data_dict)
-        loggers['wandb'] = wandb_logger.wandb
-        data_dict = wandb_logger.data_dict
-        if wandb_logger.wandb:
-            weights, epochs, hyp = opt.weights, opt.epochs, opt.hyp  # WandbLogger might update weights, epochs if resuming
-
-    nc = 1 if opt.single_cls else int(data_dict['nc'])  # number of classes
-    names = ['item'] if opt.single_cls and len(data_dict['names']) != 1 else data_dict['names']  # class names
-    assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data)  # check
-    colors = data_dict.get('colors', None)
+    init_seeds(opt.seed + 1 + RANK, deterministic=True)
+    with torch_distributed_zero_first(LOCAL_RANK):
+        data_dict = data_dict or check_dataset(data)  # check if None
+    train_path, val_path = data_dict['train'], data_dict['val']
+    nc = 1 if single_cls else int(data_dict['nc'])  # number of classes
+    names = {0: 'item'} if single_cls and len(data_dict['names']) != 1 else data_dict['names']  # class names
+    is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt')  # COCO dataset
 
     # Model
+    check_suffix(weights, '.pt')  # check weights
     pretrained = weights.endswith('.pt')
     if pretrained:
-        with torch_distributed_zero_first(rank):
-            attempt_download(weights)  # download if not found locally
-        ckpt = torch.load(weights, map_location=device)  # load checkpoint
-        model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
-        exclude = ['anchor'] if (opt.cfg or hyp.get('anchors')) and not opt.resume else []  # exclude keys
-        state_dict = ckpt['model'].float().state_dict()  # to FP32
-        state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude)  # intersect
-        model.load_state_dict(state_dict, strict=False)  # load
-        logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights))  # report
+        with torch_distributed_zero_first(LOCAL_RANK):
+            weights = attempt_download(weights)  # download if not found locally
+        ckpt = torch.load(weights, map_location='cpu')  # load checkpoint to CPU to avoid CUDA memory leak
+        model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
+        exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else []  # exclude keys
+        csd = ckpt['model'].float().state_dict()  # checkpoint state_dict as FP32
+        csd = intersect_dicts(csd, model.state_dict(), exclude=exclude)  # intersect
+        model.load_state_dict(csd, strict=False)  # load
+        LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}')  # report
     else:
-        model = Model(opt.cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
-    with torch_distributed_zero_first(rank):
-        check_dataset(data_dict)  # check
-    train_path = data_dict['train']
-    test_path = data_dict['val']
+        model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
+    amp = check_amp(model)  # check AMP
 
     # Freeze
-    freeze = []  # parameter names to freeze (full or partial)
+    freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))]  # layers to freeze
     for k, v in model.named_parameters():
         v.requires_grad = True  # train all layers
+        # v.register_hook(lambda x: torch.nan_to_num(x))  # NaN to 0 (commented for erratic training results)
         if any(x in k for x in freeze):
-            print('freezing %s' % k)
+            LOGGER.info(f'freezing {k}')
             v.requires_grad = False
 
+    # Image size
+    gs = max(int(model.stride.max()), 32)  # grid size (max stride)
+    imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2)  # verify imgsz is gs-multiple
+
+    # Batch size
+    if RANK == -1 and batch_size == -1:  # single-GPU only, estimate best batch size
+        batch_size = check_train_batch_size(model, imgsz, amp)
+        loggers.on_params_update({'batch_size': batch_size})
+
     # Optimizer
     nbs = 64  # nominal batch size
-    accumulate = max(round(nbs / total_batch_size), 1)  # accumulate loss before optimizing
-    hyp['weight_decay'] *= total_batch_size * accumulate / nbs  # scale weight_decay
-    logger.info(f"Scaled weight_decay = {hyp['weight_decay']}")
-
-    pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
-    for k, v in model.named_modules():
-        if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):
-            pg2.append(v.bias)  # biases
-        if isinstance(v, nn.BatchNorm2d):
-            pg0.append(v.weight)  # no decay
-        elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):
-            pg1.append(v.weight)  # apply decay
-
-    if opt.adam:
-        optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
-    else:
-        optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
-
-    optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']})  # add pg1 with weight_decay
-    optimizer.add_param_group({'params': pg2})  # add pg2 (biases)
-    logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0)))
-    del pg0, pg1, pg2
+    accumulate = max(round(nbs / batch_size), 1)  # accumulate loss before optimizing
+    hyp['weight_decay'] *= batch_size * accumulate / nbs  # scale weight_decay
+    optimizer = smart_optimizer(model, opt.optimizer, hyp['lr0'], hyp['momentum'], hyp['weight_decay'])
 
-    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
-    # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
-    if opt.linear_lr:
-        lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf']  # linear
-    else:
+    # Scheduler
+    if opt.cos_lr:
         lf = one_cycle(1, hyp['lrf'], epochs)  # cosine 1->hyp['lrf']
-    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
-    # plot_lr_scheduler(optimizer, scheduler, epochs)
+    else:
+        lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf']  # linear
+    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)  # plot_lr_scheduler(optimizer, scheduler, epochs)
 
     # EMA
-    ema = ModelEMA(model) if rank in [-1, 0] else None
+    ema = ModelEMA(model) if RANK in {-1, 0} else None
 
     # Resume
-    start_epoch, best_fitness = 0, 0.0
+    best_fitness, start_epoch = 0.0, 0
     if pretrained:
-        # Optimizer
-        if ckpt['optimizer'] is not None:
-            optimizer.load_state_dict(ckpt['optimizer'])
-            best_fitness = ckpt['best_fitness']
-
-        # EMA
-        if ema and ckpt.get('ema'):
-            ema.ema.load_state_dict(ckpt['ema'].float().state_dict())
-            ema.updates = ckpt['updates']
-
-        # Results
-        if ckpt.get('training_results') is not None:
-            results_file.write_text(ckpt['training_results'])  # write results.txt
-
-        # Epochs
-        start_epoch = ckpt['epoch'] + 1
-        if opt.resume:
-            assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (weights, epochs)
-        if epochs < start_epoch:
-            logger.info('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %
-                        (weights, ckpt['epoch'], epochs))
-            epochs += ckpt['epoch']  # finetune additional epochs
-
-        del ckpt, state_dict
-
-    # Image sizes
-    gs = max(int(model.stride.max()), 32)  # grid size (max stride)
-    nl = model.model[-1].nl  # number of detection layers (used for scaling hyp['obj'])
-    imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size]  # verify imgsz are gs-multiples
+        if resume:
+            best_fitness, start_epoch, epochs = smart_resume(ckpt, optimizer, ema, weights, epochs, resume)
+        del ckpt, csd
 
     # DP mode
-    if cuda and rank == -1 and torch.cuda.device_count() > 1:
+    if cuda and RANK == -1 and torch.cuda.device_count() > 1:
+        LOGGER.warning('WARNING ⚠️ DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n'
+                       'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.')
         model = torch.nn.DataParallel(model)
 
     # SyncBatchNorm
-    if opt.sync_bn and cuda and rank != -1:
+    if opt.sync_bn and cuda and RANK != -1:
         model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
-        logger.info('Using SyncBatchNorm()')
+        LOGGER.info('Using SyncBatchNorm()')
 
     # Trainloader
-    dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt,
-                                            hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank,
-                                            world_size=opt.world_size, workers=opt.workers,
-                                            image_weights=opt.image_weights, quad=opt.quad, prefix=colorstr('train: '))
-    mlc = np.concatenate(dataset.labels, 0)[:, 0].max()  # max label class
-    nb = len(dataloader)  # number of batches
-    assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1)
+    train_loader, dataset = create_dataloader(train_path,
+                                              imgsz,
+                                              batch_size // WORLD_SIZE,
+                                              gs,
+                                              single_cls,
+                                              hyp=hyp,
+                                              augment=True,
+                                              cache=None if opt.cache == 'val' else opt.cache,
+                                              rect=opt.rect,
+                                              rank=LOCAL_RANK,
+                                              workers=workers,
+                                              image_weights=opt.image_weights,
+                                              quad=opt.quad,
+                                              prefix=colorstr('train: '),
+                                              shuffle=True,
+                                              seed=opt.seed)
+    labels = np.concatenate(dataset.labels, 0)
+    mlc = int(labels[:, 0].max())  # max label class
+    assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
 
     # Process 0
-    if rank in [-1, 0]:
-        testloader = create_dataloader(test_path, imgsz_test, batch_size * 2, gs, opt,  # testloader
-                                       hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True, rank=-1,
-                                       world_size=opt.world_size, workers=opt.workers,
-                                       pad=0.5, prefix=colorstr('val: '))[0]
-        if not opt.resume:
-            labels = np.concatenate(dataset.labels, 0)
-            c = torch.tensor(labels[:, 0])  # classes
-            # cf = torch.bincount(c.long(), minlength=nc) + 1.  # frequency
-            # model._initialize_biases(cf.to(device))
-            if plots:
-                plot_labels(labels, names, save_dir, loggers)
-                if opt.sly:
-                    upload_label_vis()
-                if tb_writer:
-                    tb_writer.add_histogram('classes', c, 0)
-
-            # Anchors
+    if RANK in {-1, 0}:
+        val_loader = create_dataloader(val_path,
+                                       imgsz,
+                                       batch_size // WORLD_SIZE * 2,
+                                       gs,
+                                       single_cls,
+                                       hyp=hyp,
+                                       cache=None if noval else opt.cache,
+                                       rect=True,
+                                       rank=-1,
+                                       workers=workers * 2,
+                                       pad=0.5,
+                                       prefix=colorstr('val: '))[0]
+
+        if not resume:
             if not opt.noautoanchor:
-                check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
+                check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)  # run AutoAnchor
             model.half().float()  # pre-reduce anchor precision
 
+        callbacks.run('on_pretrain_routine_end', labels, names)
+
     # DDP mode
-    if cuda and rank != -1:
-        model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank,
-                    # nn.MultiheadAttention incompatibility with DDP https://github.com/pytorch/pytorch/issues/26698
-                    find_unused_parameters=any(isinstance(layer, nn.MultiheadAttention) for layer in model.modules()))
-
-    # Model parameters
-    hyp['box'] *= 3. / nl  # scale to layers
-    hyp['cls'] *= nc / 80. * 3. / nl  # scale to classes and layers
-    hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl  # scale to image size and layers
+    if cuda and RANK != -1:
+        model = smart_DDP(model)
+
+    # Model attributes
+    nl = de_parallel(model).model[-1].nl  # number of detection layers (to scale hyps)
+    hyp['box'] *= 3 / nl  # scale to layers
+    hyp['cls'] *= nc / 80 * 3 / nl  # scale to classes and layers
+    hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl  # scale to image size and layers
     hyp['label_smoothing'] = opt.label_smoothing
     model.nc = nc  # attach number of classes to model
     model.hyp = hyp  # attach hyperparameters to model
-    model.gr = 1.0  # iou loss ratio (obj_loss = 1.0 or iou)
     model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc  # attach class weights
     model.names = names
-    model.colors = colors
-    model.img_size = opt.img_size
 
     # Start training
     t0 = time.time()
-    nw = max(round(hyp['warmup_epochs'] * nb), 1000)  # number of warmup iterations, max(3 epochs, 1k iterations)
+    nb = len(train_loader)  # number of batches
+    nw = max(round(hyp['warmup_epochs'] * nb), 100)  # number of warmup iterations, max(3 epochs, 100 iterations)
     # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
+    last_opt_step = -1
     maps = np.zeros(nc)  # mAP per class
     results = (0, 0, 0, 0, 0, 0, 0)  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
     scheduler.last_epoch = start_epoch - 1  # do not move
-    scaler = amp.GradScaler(enabled=cuda)
+    scaler = torch.cuda.amp.GradScaler(enabled=amp)
+    stopper, stop = EarlyStopping(patience=opt.patience), False
     compute_loss = ComputeLoss(model)  # init loss class
-    logger.info(f'Image sizes {imgsz} train, {imgsz_test} test\n'
-                f'Using {dataloader.num_workers} dataloader workers\n'
-                f'Logging results to {save_dir}\n'
+    callbacks.run('on_train_start')
+    LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n'
+                f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n'
+                f"Logging results to {colorstr('bold', save_dir)}\n"
                 f'Starting training for {epochs} epochs...')
     sly_epoch_progress = sly.Progress("Epoch", epochs)
     for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
+        callbacks.run('on_train_epoch_start')
         model.train()
 
-        # Update image weights (optional)
+        # Update image weights (optional, single-GPU only)
         if opt.image_weights:
-            # Generate indices
-            if rank in [-1, 0]:
-                cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc  # class weights
-                iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw)  # image weights
-                dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n)  # rand weighted idx
-            # Broadcast if DDP
-            if rank != -1:
-                indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int()
-                dist.broadcast(indices, 0)
-                if rank != 0:
-                    dataset.indices = indices.cpu().numpy()
-
-        # Update mosaic border
+            cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc  # class weights
+            iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw)  # image weights
+            dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n)  # rand weighted idx
+
+        # Update mosaic border (optional)
         # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
         # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders
 
-        mloss = torch.zeros(4, device=device)  # mean losses
-        if rank != -1:
-            dataloader.sampler.set_epoch(epoch)
-        pbar = enumerate(dataloader)
-        #logger.info(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'total', 'labels', 'img_size'))
-        if rank in [-1, 0]:
-            #pbar = tqdm(pbar, total=nb)  # progress bar
-            pass
+        mloss = torch.zeros(3, device=device)  # mean losses
+        if RANK != -1:
+            train_loader.sampler.set_epoch(epoch)
+        pbar = enumerate(train_loader)
+        LOGGER.info(('\n' + '%11s' * 7) % ('Epoch', 'GPU_mem', 'box_loss', 'obj_loss', 'cls_loss', 'Instances', 'Size'))
+        if RANK in {-1, 0}:
+            pbar = tqdm(pbar, total=nb, bar_format=TQDM_BAR_FORMAT)  # progress bar
         optimizer.zero_grad()
         for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
+            callbacks.run('on_train_batch_start')
             ni = i + nb * epoch  # number integrated batches (since train start)
-            imgs = imgs.to(device, non_blocking=True).float() / 255.0  # uint8 to float32, 0-255 to 0.0-1.0
+            imgs = imgs.to(device, non_blocking=True).float() / 255  # uint8 to float32, 0-255 to 0.0-1.0
 
             # Warmup
             if ni <= nw:
                 xi = [0, nw]  # x interp
-                # model.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
-                accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round())
+                # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
+                accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
                 for j, x in enumerate(optimizer.param_groups):
                     # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
-                    x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
+                    x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 0 else 0.0, x['initial_lr'] * lf(epoch)])
                     if 'momentum' in x:
                         x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']])
 
             # Multi-scale
             if opt.multi_scale:
-                sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs  # size
+                sz = random.randrange(int(imgsz * 0.5), int(imgsz * 1.5) + gs) // gs * gs  # size
                 sf = sz / max(imgs.shape[2:])  # scale factor
                 if sf != 1:
                     ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to gs-multiple)
-                    imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
+                    imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
 
             # Forward
-            with amp.autocast(enabled=cuda):
+            with torch.cuda.amp.autocast(amp):
                 pred = model(imgs)  # forward
                 loss, loss_items = compute_loss(pred, targets.to(device))  # loss scaled by batch_size
-                if rank != -1:
-                    loss *= opt.world_size  # gradient averaged between devices in DDP mode
+                if RANK != -1:
+                    loss *= WORLD_SIZE  # gradient averaged between devices in DDP mode
                 if opt.quad:
                     loss *= 4.
 
             # Backward
             scaler.scale(loss).backward()
 
-            # Optimize
-            if ni % accumulate == 0:
+            # Optimize - https://pytorch.org/docs/master/notes/amp_examples.html
+            if ni - last_opt_step >= accumulate:
+                scaler.unscale_(optimizer)  # unscale gradients
+                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0)  # clip gradients
                 scaler.step(optimizer)  # optimizer.step
                 scaler.update()
                 optimizer.zero_grad()
                 if ema:
                     ema.update(model)
+                last_opt_step = ni
 
-            # Print
-            if rank in [-1, 0]:
+            # Log
+            if RANK in {-1, 0}:
                 mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
-                mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0)  # (GB)
-                s = ('%10s' * 2 + '%10.4g' * 6) % (
-                    '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1])
-                #pbar.set_description(s)
-                #'Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'total', 'labels', 'img_size'
-                logger.info("Training", extra={
-                    "epoch": epoch,
-                    "epochs_count": epochs - 1,
-                    "gpu_mem": mem,
-                    "mbox_loss": float(mloss[0].cpu().numpy()),
-                    "mobj_loss": float(mloss[1].cpu().numpy()),
-                    "mcls_loss": float(mloss[2].cpu().numpy()),
-                    "mtotal_loss": float(mloss[3].cpu().numpy()),
-                    "labels": targets.shape[0],
-                    "img_size": imgs.shape[-1]
-                })
-
-
-                # Plot
-                if plots and ni < 3:
-                    f = save_dir / f'train_batch{ni}.jpg'  # filename
-                    plot_images(imgs, targets, paths, f)
-                    #Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start()
-                    # if tb_writer:
-                    #     tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)
-                    #     tb_writer.add_graph(torch.jit.trace(model, imgs, strict=False), [])  # add model graph
-                elif plots and ni == 10 and wandb_logger.wandb:
-                    wandb_logger.log({"Mosaics": [wandb_logger.wandb.Image(str(x), caption=x.name) for x in
-                                                  save_dir.glob('train*.jpg') if x.exists()]})
+                mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G'  # (GB)
+                pbar.set_description(('%11s' * 2 + '%11.4g' * 5) %
+                                     (f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1]))
+                callbacks.run('on_train_batch_end', model, ni, imgs, targets, paths, list(mloss))
                 if plots and ni == 10 and opt.sly:
                     train_batches_uploaded = True
                     upload_train_data_vis()
-
-
+                if callbacks.stop_training:
+                    return
             # end batch ------------------------------------------------------------------------------------------------
-        # end epoch ----------------------------------------------------------------------------------------------------
 
         # Scheduler
-        lr = [x['lr'] for x in optimizer.param_groups]  # for tensorboard
+        lr = [x['lr'] for x in optimizer.param_groups]  # for loggers
         scheduler.step()
 
-        # DDP process 0 or single-GPU
-        if rank in [-1, 0]:
+        if RANK in {-1, 0}:
             # mAP
-            ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights'])
-            final_epoch = epoch + 1 == epochs
-            if not opt.notest or final_epoch:  # Calculate mAP
-                wandb_logger.current_epoch = epoch + 1
-                results, maps, times = test.test(data_dict,
-                                                 batch_size=batch_size * 2,
-                                                 imgsz=imgsz_test,
-                                                 model=ema.ema,
-                                                 single_cls=opt.single_cls,
-                                                 dataloader=testloader,
-                                                 save_dir=save_dir,
-                                                 verbose=nc < 50 and final_epoch,
-                                                 plots=plots and final_epoch,
-                                                 wandb_logger=wandb_logger,
-                                                 compute_loss=compute_loss,
-                                                 is_coco=is_coco,
-                                                 opt_sly=opt.sly)
-
-            # Write
-            with open(results_file, 'a') as f:
-                f.write(s + '%10.4g' * 7 % results + '\n')  # append metrics, val_loss
-            if len(opt.name) and opt.bucket:
-                os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name))
-
-            # Log
-            if opt.sly:
-                send_epoch_log(epoch, epochs, sly_epoch_progress)
-                metrics = {}
-            tags = ['train/box_loss', 'train/obj_loss', 'train/cls_loss',  # train loss
-                    'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',
-                    'val/box_loss', 'val/obj_loss', 'val/cls_loss',  # val loss
-                    'x/lr0', 'x/lr1', 'x/lr2']  # params
-            for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags):
-                if tb_writer:
-                    tb_writer.add_scalar(tag, x, epoch)  # tensorboard
-                if wandb_logger.wandb:
-                    wandb_logger.log({tag: x})  # W&B
-                if opt.sly:
-                    if torch.is_tensor(x):
-                        x = float(x.cpu().numpy())
-                    metrics[tag] = x
-
-            if opt.sly:
-                send_metrics(epoch, epochs, metrics, opt.metrics_period)
+            callbacks.run('on_train_epoch_end', epoch=epoch)
+            ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
+            final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
+            if not noval or final_epoch:  # Calculate mAP
+                results, maps, _ = test.run(data_dict,
+                                                batch_size=batch_size // WORLD_SIZE * 2,
+                                                imgsz=imgsz,
+                                                half=amp,
+                                                model=ema.ema,
+                                                single_cls=single_cls,
+                                                dataloader=val_loader,
+                                                save_dir=save_dir,
+                                                plots=False,
+                                                callbacks=callbacks,
+                                                compute_loss=compute_loss,
+                                                opt_sly=opt.sly)
 
             # Update best mAP
             fi = fitness(np.array(results).reshape(1, -1))  # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
+            stop = stopper(epoch=epoch, fitness=fi)  # early stop check
             if fi > best_fitness:
                 best_fitness = fi
-            wandb_logger.end_epoch(best_result=best_fitness == fi)
+            log_vals = list(mloss) + list(results) + lr
+            if opt.sly:
+                send_epoch_log(epoch, epochs, sly_epoch_progress)
+            callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi)
 
             # Save model
-            if (not opt.nosave) or (final_epoch and not opt.evolve):  # if save
-                model.img_size = opt.img_size
-                ema.ema.img_size = opt.img_size
-                ckpt = {'epoch': epoch,
-                        'best_fitness': best_fitness,
-                        'training_results': results_file.read_text(),
-                        'model': deepcopy(model.module if is_parallel(model) else model).half(),
-                        'ema': deepcopy(ema.ema).half(),
-                        'updates': ema.updates,
-                        'optimizer': optimizer.state_dict(),
-                        'wandb_id': wandb_logger.wandb_run.id if wandb_logger.wandb else None}
+            if (not nosave) or (final_epoch and not evolve):  # if save
+                ckpt = {
+                    'epoch': epoch,
+                    'best_fitness': best_fitness,
+                    'model': deepcopy(de_parallel(model)).half(),
+                    'ema': deepcopy(ema.ema).half(),
+                    'updates': ema.updates,
+                    'optimizer': optimizer.state_dict(),
+                    'opt': vars(opt),
+                    'git': GIT_INFO,  # {remote, branch, commit} if a git repo
+                    'date': datetime.now().isoformat()}
 
                 # Save last, best and delete
                 torch.save(ckpt, last)
                 if best_fitness == fi:
                     torch.save(ckpt, best)
-                if wandb_logger.wandb:
-                    if ((epoch + 1) % opt.save_period == 0 and not final_epoch) and opt.save_period != -1:
-                        wandb_logger.log_model(
-                            last.parent, opt, epoch, fi, best_model=best_fitness == fi)
+                if opt.save_period > 0 and epoch % opt.save_period == 0:
+                    torch.save(ckpt, w / f'epoch{epoch}.pt')
                 del ckpt
+                callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi)
+
+        # EarlyStopping
+        if RANK != -1:  # if DDP training
+            broadcast_list = [stop if RANK == 0 else None]
+            dist.broadcast_object_list(broadcast_list, 0)  # broadcast 'stop' to all ranks
+            if RANK != 0:
+                stop = broadcast_list[0]
+        if stop:
+            break  # must break all DDP ranks
 
         # end epoch ----------------------------------------------------------------------------------------------------
     if plots and opt.sly and train_batches_uploaded is False:
         train_batches_uploaded = True
         upload_train_data_vis()
-
-    # end training
-    if rank in [-1, 0]:
-        # Plots
-        if plots:
-            plot_results(save_dir=save_dir)  # save as results.png
-            if wandb_logger.wandb:
-                files = ['results.png', 'confusion_matrix.png', *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')]]
-                wandb_logger.log({"Results": [wandb_logger.wandb.Image(str(save_dir / f), caption=f) for f in files
-                                              if (save_dir / f).exists()]})
-        # Test best.pt
-        logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
-        if opt.data.endswith('coco.yaml') and nc == 80:  # if COCO
-            for m in (last, best) if best.exists() else (last):  # speed, mAP tests
-                results, _, _ = test.test(opt.data,
-                                          batch_size=batch_size * 2,
-                                          imgsz=imgsz_test,
-                                          conf_thres=0.001,
-                                          iou_thres=0.7,
-                                          model=attempt_load(m, device).half(),
-                                          single_cls=opt.single_cls,
-                                          dataloader=testloader,
-                                          save_dir=save_dir,
-                                          save_json=True,
-                                          plots=False,
-                                          is_coco=is_coco)
-
-        # Strip optimizers
-        final = best if best.exists() else last  # final model
+    # end training -----------------------------------------------------------------------------------------------------
+    if RANK in {-1, 0}:
+        LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.')
         for f in last, best:
             if f.exists():
                 strip_optimizer(f)  # strip optimizers
-        if opt.bucket:
-            os.system(f'gsutil cp {final} gs://{opt.bucket}/weights')  # upload
-        if wandb_logger.wandb and not opt.evolve:  # Log the stripped model
-            wandb_logger.wandb.log_artifact(str(final), type='model',
-                                            name='run_' + wandb_logger.wandb_run.id + '_model',
-                                            aliases=['last', 'best', 'stripped'])
-        wandb_logger.finish_run()
-    else:
-        dist.destroy_process_group()
+                if f is best:
+                    LOGGER.info(f'\nValidating {f}...')
+                    results, _, _ = test.run(
+                        data_dict,
+                        batch_size=batch_size // WORLD_SIZE * 2,
+                        imgsz=imgsz,
+                        model=attempt_load(f, device).half(),
+                        iou_thres=0.65 if is_coco else 0.60,  # best pycocotools at iou 0.65
+                        single_cls=single_cls,
+                        dataloader=val_loader,
+                        save_dir=save_dir,
+                        save_json=is_coco,
+                        verbose=True,
+                        plots=plots,
+                        callbacks=callbacks,
+                        compute_loss=compute_loss)  # val best model with plots
+                    if is_coco:
+                        callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)
+
+        callbacks.run('on_train_end', last, best, epoch, results)
+
     torch.cuda.empty_cache()
     return results
 
 
-def main():
+def parse_opt(known=False):
     parser = argparse.ArgumentParser()
-    parser.add_argument('--weights', type=str, default='yolov5s.pt', help='initial weights path')
+    parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path')
     parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
-    parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path')
-    parser.add_argument('--hyp', type=str, default='data/hyp.scratch.yaml', help='hyperparameters path')
-    parser.add_argument('--epochs', type=int, default=300)
-    parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs')
-    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='[train, test] image sizes')
+    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
+    parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
+    parser.add_argument('--epochs', type=int, default=100, help='total training epochs')
+    parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch')
+    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
     parser.add_argument('--rect', action='store_true', help='rectangular training')
     parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
     parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
-    parser.add_argument('--notest', action='store_true', help='only test final epoch')
-    parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
-    parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')
+    parser.add_argument('--noval', action='store_true', help='only validate final epoch')
+    parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor')
+    parser.add_argument('--noplots', action='store_true', help='save no plot files')
+    parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
     parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
-    parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
+    parser.add_argument('--cache', type=str, nargs='?', const='ram', help='image --cache ram/disk')
     parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
     parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
     parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
-    parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')
+    parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
     parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
-    parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
-    parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers')
-    parser.add_argument('--project', default='runs/train', help='save to project/name')
-    parser.add_argument('--entity', default=None, help='W&B entity')
+    parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
+    parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
     parser.add_argument('--name', default='exp', help='save to project/name')
     parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
     parser.add_argument('--quad', action='store_true', help='quad dataloader')
-    parser.add_argument('--linear-lr', action='store_true', help='linear LR')
+    parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler')
     parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
-    parser.add_argument('--upload_dataset', action='store_true', help='Upload dataset as W&B artifact table')
-    parser.add_argument('--bbox_interval', type=int, default=-1, help='Set bounding-box image logging interval for W&B')
-    parser.add_argument('--save_period', type=int, default=-1, help='Log model after every "save_period" epoch')
-    parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used')
+    parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
+    parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
+    parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
+    parser.add_argument('--seed', type=int, default=0, help='Global training seed')
+    parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify')
     parser.add_argument('--sly', action='store_true', help='for Supervisely App integration')
     parser.add_argument('--metrics_period', type=int, default=1, help='Log metrics to Supervisely every "metrics_period" epochs')
+    
+    # Logger arguments
+    parser.add_argument('--entity', default=None, help='Entity')
+    parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='Upload data, "val" option')
+    parser.add_argument('--bbox_interval', type=int, default=-1, help='Set bounding-box image logging interval')
+    parser.add_argument('--artifact_alias', type=str, default='latest', help='Version of dataset artifact to use')
 
-    opt = parser.parse_args()
-    print("Input arguments:", opt)
+    return parser.parse_known_args()[0] if known else parser.parse_args()
 
-    # Set DDP variables
-    opt.world_size = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1
-    opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1
-    set_logging(opt.global_rank)
-    if opt.global_rank in [-1, 0]:
-        check_git_status()
-        #check_requirements()
 
-    # Resume
-    wandb_run = check_wandb_resume(opt)
-    if opt.resume and not wandb_run:  # resume an interrupted run
-        ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run()  # specified or most recent path
-        assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
-        apriori = opt.global_rank, opt.local_rank
-        with open(Path(ckpt).parent.parent / 'opt.yaml') as f:
-            opt = argparse.Namespace(**yaml.safe_load(f))  # replace
-        opt.cfg, opt.weights, opt.resume, opt.batch_size, opt.global_rank, opt.local_rank = \
-            '', ckpt, True, opt.total_batch_size, *apriori  # reinstate
-        logger.info('Resuming training from %s' % ckpt)
+def main(opt, callbacks=Callbacks()):
+    # Checks
+    if RANK in {-1, 0}:
+        print_args(vars(opt))
+        check_git_status()
+        check_requirements()
+
+    # Resume (from specified or most recent last.pt)
+    if opt.resume and not check_comet_resume(opt) and not opt.evolve:
+        last = Path(check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run())
+        opt_yaml = last.parent.parent / 'opt.yaml'  # train options yaml
+        opt_data = opt.data  # original dataset
+        if opt_yaml.is_file():
+            with open(opt_yaml, errors='ignore') as f:
+                d = yaml.safe_load(f)
+        else:
+            d = torch.load(last, map_location='cpu')['opt']
+        opt = argparse.Namespace(**d)  # replace
+        opt.cfg, opt.weights, opt.resume = '', str(last), True  # reinstate
+        if is_url(opt_data):
+            opt.data = check_file(opt_data)  # avoid HUB resume auth timeout
     else:
-        # opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml')
-        opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(opt.cfg), check_file(opt.hyp)  # check files
+        opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \
+            check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project)  # checks
         assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified'
-        opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size)))  # extend to 2 sizes (train, test)
-        opt.name = 'evolve' if opt.evolve else opt.name
-        opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok | opt.evolve))
+        if opt.evolve:
+            if opt.project == str(ROOT / 'runs/train'):  # if default project name, rename to runs/evolve
+                opt.project = str(ROOT / 'runs/evolve')
+            opt.exist_ok, opt.resume = opt.resume, False  # pass resume to exist_ok and disable resume
+        if opt.name == 'cfg':
+            opt.name = Path(opt.cfg).stem  # use model.yaml as name
+        opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))
 
     # DDP mode
-    opt.total_batch_size = opt.batch_size
     device = select_device(opt.device, batch_size=opt.batch_size)
-    if opt.local_rank != -1:
-        assert torch.cuda.device_count() > opt.local_rank
-        torch.cuda.set_device(opt.local_rank)
-        device = torch.device('cuda', opt.local_rank)
-        dist.init_process_group(backend='nccl', init_method='env://')  # distributed backend
-        assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count'
-        opt.batch_size = opt.total_batch_size // opt.world_size
-
-    # Hyperparameters
-    with open(opt.hyp) as f:
-        hyp = yaml.safe_load(f)  # load hyps
+    if LOCAL_RANK != -1:
+        msg = 'is not compatible with YOLOv5 Multi-GPU DDP training'
+        assert not opt.image_weights, f'--image-weights {msg}'
+        assert not opt.evolve, f'--evolve {msg}'
+        assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size'
+        assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE'
+        assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
+        torch.cuda.set_device(LOCAL_RANK)
+        device = torch.device('cuda', LOCAL_RANK)
+        dist.init_process_group(backend='nccl' if dist.is_nccl_available() else 'gloo')
 
     # Train
-    logger.info(opt)
     if not opt.evolve:
-        tb_writer = None  # init loggers
-        if opt.global_rank in [-1, 0]:
-            #prefix = colorstr('tensorboard: ')
-            #logger.info(f"{prefix}Start with 'tensorboard --logdir {opt.project}', view at http://localhost:6006/")
-            tb_writer = SummaryWriter(opt.save_dir)  # Tensorboard
-        train(hyp, opt, device, tb_writer)
+        train(opt.hyp, opt, device, callbacks)
 
     # Evolve hyperparameters (optional)
     else:
         # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
-        meta = {'lr0': (1, 1e-5, 1e-1),  # initial learning rate (SGD=1E-2, Adam=1E-3)
-                'lrf': (1, 0.01, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)
-                'momentum': (0.3, 0.6, 0.98),  # SGD momentum/Adam beta1
-                'weight_decay': (1, 0.0, 0.001),  # optimizer weight decay
-                'warmup_epochs': (1, 0.0, 5.0),  # warmup epochs (fractions ok)
-                'warmup_momentum': (1, 0.0, 0.95),  # warmup initial momentum
-                'warmup_bias_lr': (1, 0.0, 0.2),  # warmup initial bias lr
-                'box': (1, 0.02, 0.2),  # box loss gain
-                'cls': (1, 0.2, 4.0),  # cls loss gain
-                'cls_pw': (1, 0.5, 2.0),  # cls BCELoss positive_weight
-                'obj': (1, 0.2, 4.0),  # obj loss gain (scale with pixels)
-                'obj_pw': (1, 0.5, 2.0),  # obj BCELoss positive_weight
-                'iou_t': (0, 0.1, 0.7),  # IoU training threshold
-                'anchor_t': (1, 2.0, 8.0),  # anchor-multiple threshold
-                'anchors': (2, 2.0, 10.0),  # anchors per output grid (0 to ignore)
-                'fl_gamma': (0, 0.0, 2.0),  # focal loss gamma (efficientDet default gamma=1.5)
-                'hsv_h': (1, 0.0, 0.1),  # image HSV-Hue augmentation (fraction)
-                'hsv_s': (1, 0.0, 0.9),  # image HSV-Saturation augmentation (fraction)
-                'hsv_v': (1, 0.0, 0.9),  # image HSV-Value augmentation (fraction)
-                'degrees': (1, 0.0, 45.0),  # image rotation (+/- deg)
-                'translate': (1, 0.0, 0.9),  # image translation (+/- fraction)
-                'scale': (1, 0.0, 0.9),  # image scale (+/- gain)
-                'shear': (1, 0.0, 10.0),  # image shear (+/- deg)
-                'perspective': (0, 0.0, 0.001),  # image perspective (+/- fraction), range 0-0.001
-                'flipud': (1, 0.0, 1.0),  # image flip up-down (probability)
-                'fliplr': (0, 0.0, 1.0),  # image flip left-right (probability)
-                'mosaic': (1, 0.0, 1.0),  # image mixup (probability)
-                'mixup': (1, 0.0, 1.0)}  # image mixup (probability)
-
-        assert opt.local_rank == -1, 'DDP mode not implemented for --evolve'
-        opt.notest, opt.nosave = True, True  # only test/save final epoch
+        meta = {
+            'lr0': (1, 1e-5, 1e-1),  # initial learning rate (SGD=1E-2, Adam=1E-3)
+            'lrf': (1, 0.01, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)
+            'momentum': (0.3, 0.6, 0.98),  # SGD momentum/Adam beta1
+            'weight_decay': (1, 0.0, 0.001),  # optimizer weight decay
+            'warmup_epochs': (1, 0.0, 5.0),  # warmup epochs (fractions ok)
+            'warmup_momentum': (1, 0.0, 0.95),  # warmup initial momentum
+            'warmup_bias_lr': (1, 0.0, 0.2),  # warmup initial bias lr
+            'box': (1, 0.02, 0.2),  # box loss gain
+            'cls': (1, 0.2, 4.0),  # cls loss gain
+            'cls_pw': (1, 0.5, 2.0),  # cls BCELoss positive_weight
+            'obj': (1, 0.2, 4.0),  # obj loss gain (scale with pixels)
+            'obj_pw': (1, 0.5, 2.0),  # obj BCELoss positive_weight
+            'iou_t': (0, 0.1, 0.7),  # IoU training threshold
+            'anchor_t': (1, 2.0, 8.0),  # anchor-multiple threshold
+            'anchors': (2, 2.0, 10.0),  # anchors per output grid (0 to ignore)
+            'fl_gamma': (0, 0.0, 2.0),  # focal loss gamma (efficientDet default gamma=1.5)
+            'hsv_h': (1, 0.0, 0.1),  # image HSV-Hue augmentation (fraction)
+            'hsv_s': (1, 0.0, 0.9),  # image HSV-Saturation augmentation (fraction)
+            'hsv_v': (1, 0.0, 0.9),  # image HSV-Value augmentation (fraction)
+            'degrees': (1, 0.0, 45.0),  # image rotation (+/- deg)
+            'translate': (1, 0.0, 0.9),  # image translation (+/- fraction)
+            'scale': (1, 0.0, 0.9),  # image scale (+/- gain)
+            'shear': (1, 0.0, 10.0),  # image shear (+/- deg)
+            'perspective': (0, 0.0, 0.001),  # image perspective (+/- fraction), range 0-0.001
+            'flipud': (1, 0.0, 1.0),  # image flip up-down (probability)
+            'fliplr': (0, 0.0, 1.0),  # image flip left-right (probability)
+            'mosaic': (1, 0.0, 1.0),  # image mixup (probability)
+            'mixup': (1, 0.0, 1.0),  # image mixup (probability)
+            'copy_paste': (1, 0.0, 1.0)}  # segment copy-paste (probability)
+
+        with open(opt.hyp, errors='ignore') as f:
+            hyp = yaml.safe_load(f)  # load hyps dict
+            if 'anchors' not in hyp:  # anchors commented in hyp.yaml
+                hyp['anchors'] = 3
+        if opt.noautoanchor:
+            del hyp['anchors'], meta['anchors']
+        opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir)  # only val/save final epoch
         # ei = [isinstance(x, (int, float)) for x in hyp.values()]  # evolvable indices
-        yaml_file = Path(opt.save_dir) / 'hyp_evolved.yaml'  # save best result here
+        evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv'
         if opt.bucket:
-            os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket)  # download evolve.txt if exists
-
-        for _ in range(300):  # generations to evolve
-            if Path('evolve.txt').exists():  # if evolve.txt exists: select best hyps and mutate
+            # download evolve.csv if exists
+            subprocess.run([
+                'gsutil',
+                'cp',
+                f'gs://{opt.bucket}/evolve.csv',
+                str(evolve_csv),])
+
+        for _ in range(opt.evolve):  # generations to evolve
+            if evolve_csv.exists():  # if evolve.csv exists: select best hyps and mutate
                 # Select parent(s)
                 parent = 'single'  # parent selection method: 'single' or 'weighted'
-                x = np.loadtxt('evolve.txt', ndmin=2)
+                x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1)
                 n = min(5, len(x))  # number of previous results to consider
                 x = x[np.argsort(-fitness(x))][:n]  # top n mutations
-                w = fitness(x) - fitness(x).min()  # weights
+                w = fitness(x) - fitness(x).min() + 1E-6  # weights (sum > 0)
                 if parent == 'single' or len(x) == 1:
                     # x = x[random.randint(0, n - 1)]  # random selection
                     x = x[random.choices(range(n), weights=w)[0]]  # weighted selection
@@ -654,7 +620,7 @@ def main():
                 mp, s = 0.8, 0.2  # mutation probability, sigma
                 npr = np.random
                 npr.seed(int(time.time()))
-                g = np.array([x[0] for x in meta.values()])  # gains 0-1
+                g = np.array([meta[k][0] for k in hyp.keys()])  # gains 0-1
                 ng = len(meta)
                 v = np.ones(ng)
                 while all(v == 1):  # mutate until a change occurs (prevent duplicates)
@@ -669,16 +635,29 @@ def main():
                 hyp[k] = round(hyp[k], 5)  # significant digits
 
             # Train mutation
-            results = train(hyp.copy(), opt, device)
-
+            results = train(hyp.copy(), opt, device, callbacks)
+            callbacks = Callbacks()
             # Write mutation results
-            print_mutation(hyp.copy(), results, yaml_file, opt.bucket)
+            keys = ('metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/box_loss',
+                    'val/obj_loss', 'val/cls_loss')
+            print_mutation(keys, results, hyp.copy(), save_dir, opt.bucket)
 
         # Plot results
-        plot_evolution(yaml_file)
-        print(f'Hyperparameter evolution complete. Best results saved as: {yaml_file}\n'
-              f'Command to train a new model with these hyperparameters: $ python train.py --hyp {yaml_file}')
+        plot_evolve(evolve_csv)
+        LOGGER.info(f'Hyperparameter evolution finished {opt.evolve} generations\n'
+                    f"Results saved to {colorstr('bold', save_dir)}\n"
+                    f'Usage example: $ python train.py --hyp {evolve_yaml}')
+
+
+def run(**kwargs):
+    # Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt')
+    opt = parse_opt(True)
+    for k, v in kwargs.items():
+        setattr(opt, k, v)
+    main(opt)
+    return opt
+
 
-#--data coco128.yaml --cfg yolov5s.yaml --weights yolov5s.pt --batch-size 64
 if __name__ == '__main__':
-    main()
\ No newline at end of file
+    opt = parse_opt()
+    main(opt)
diff --git a/utils/__init__.py b/utils/__init__.py
index e69de29bb2d1..5b9fcd517e03 100644
--- a/utils/__init__.py
+++ b/utils/__init__.py
@@ -0,0 +1,82 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+utils/initialization
+"""
+
+import contextlib
+import platform
+import threading
+
+
+def emojis(str=''):
+    # Return platform-dependent emoji-safe version of string
+    return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str
+
+
+class TryExcept(contextlib.ContextDecorator):
+    # YOLOv5 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager
+    def __init__(self, msg=''):
+        self.msg = msg
+
+    def __enter__(self):
+        pass
+
+    def __exit__(self, exc_type, value, traceback):
+        if value:
+            print(emojis(f"{self.msg}{': ' if self.msg else ''}{value}"))
+        return True
+
+
+def threaded(func):
+    # Multi-threads a target function and returns thread. Usage: @threaded decorator
+    def wrapper(*args, **kwargs):
+        thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True)
+        thread.start()
+        return thread
+
+    return wrapper
+
+
+def join_threads(verbose=False):
+    # Join all daemon threads, i.e. atexit.register(lambda: join_threads())
+    main_thread = threading.current_thread()
+    for t in threading.enumerate():
+        if t is not main_thread:
+            if verbose:
+                print(f'Joining thread {t.name}')
+            t.join()
+
+
+def notebook_init(verbose=True):
+    # Check system software and hardware
+    print('Checking setup...')
+
+    import os
+    import shutil
+
+    from utils.general import check_font, check_requirements, is_colab
+    from utils.torch_utils import select_device  # imports
+
+    check_font()
+
+    import psutil
+
+    if is_colab():
+        shutil.rmtree('/content/sample_data', ignore_errors=True)  # remove colab /sample_data directory
+
+    # System info
+    display = None
+    if verbose:
+        gb = 1 << 30  # bytes to GiB (1024 ** 3)
+        ram = psutil.virtual_memory().total
+        total, used, free = shutil.disk_usage('/')
+        with contextlib.suppress(Exception):  # clear display if ipython is installed
+            from IPython import display
+            display.clear_output()
+        s = f'({os.cpu_count()} CPUs, {ram / gb:.1f} GB RAM, {(total - free) / gb:.1f}/{total / gb:.1f} GB disk)'
+    else:
+        s = ''
+
+    select_device(newline=False)
+    print(emojis(f'Setup complete ✅ {s}'))
+    return display
diff --git a/utils/activations.py b/utils/activations.py
index 92a3b5eaa54b..084ce8c41230 100644
--- a/utils/activations.py
+++ b/utils/activations.py
@@ -1,33 +1,39 @@
-# Activation functions
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Activation functions
+"""
 
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
 
-# SiLU https://arxiv.org/pdf/1606.08415.pdf ----------------------------------------------------------------------------
-class SiLU(nn.Module):  # export-friendly version of nn.SiLU()
+class SiLU(nn.Module):
+    # SiLU activation https://arxiv.org/pdf/1606.08415.pdf
     @staticmethod
     def forward(x):
         return x * torch.sigmoid(x)
 
 
-class Hardswish(nn.Module):  # export-friendly version of nn.Hardswish()
+class Hardswish(nn.Module):
+    # Hard-SiLU activation
     @staticmethod
     def forward(x):
-        # return x * F.hardsigmoid(x)  # for torchscript and CoreML
-        return x * F.hardtanh(x + 3, 0., 6.) / 6.  # for torchscript, CoreML and ONNX
+        # return x * F.hardsigmoid(x)  # for TorchScript and CoreML
+        return x * F.hardtanh(x + 3, 0.0, 6.0) / 6.0  # for TorchScript, CoreML and ONNX
 
 
-# Mish https://github.com/digantamisra98/Mish --------------------------------------------------------------------------
 class Mish(nn.Module):
+    # Mish activation https://github.com/digantamisra98/Mish
     @staticmethod
     def forward(x):
         return x * F.softplus(x).tanh()
 
 
 class MemoryEfficientMish(nn.Module):
+    # Mish activation memory-efficient
     class F(torch.autograd.Function):
+
         @staticmethod
         def forward(ctx, x):
             ctx.save_for_backward(x)
@@ -44,8 +50,8 @@ def forward(self, x):
         return self.F.apply(x)
 
 
-# FReLU https://arxiv.org/abs/2007.11824 -------------------------------------------------------------------------------
 class FReLU(nn.Module):
+    # FReLU activation https://arxiv.org/abs/2007.11824
     def __init__(self, c1, k=3):  # ch_in, kernel
         super().__init__()
         self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False)
@@ -55,9 +61,8 @@ def forward(self, x):
         return torch.max(x, self.bn(self.conv(x)))
 
 
-# ACON https://arxiv.org/pdf/2009.04759.pdf ----------------------------------------------------------------------------
 class AconC(nn.Module):
-    r""" ACON activation (activate or not).
+    r""" ACON activation (activate or not)
     AconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is a learnable parameter
     according to "Activate or Not: Learning Customized Activation" <https://arxiv.org/pdf/2009.04759.pdf>.
     """
@@ -74,7 +79,7 @@ def forward(self, x):
 
 
 class MetaAconC(nn.Module):
-    r""" ACON activation (activate or not).
+    r""" ACON activation (activate or not)
     MetaAconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is generated by a small network
     according to "Activate or Not: Learning Customized Activation" <https://arxiv.org/pdf/2009.04759.pdf>.
     """
diff --git a/utils/augmentations.py b/utils/augmentations.py
new file mode 100644
index 000000000000..7ab75f17fb18
--- /dev/null
+++ b/utils/augmentations.py
@@ -0,0 +1,397 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Image augmentation functions
+"""
+
+import math
+import random
+
+import cv2
+import numpy as np
+import torch
+import torchvision.transforms as T
+import torchvision.transforms.functional as TF
+
+from utils.general import LOGGER, check_version, colorstr, resample_segments, segment2box, xywhn2xyxy
+from utils.metrics import bbox_ioa
+
+IMAGENET_MEAN = 0.485, 0.456, 0.406  # RGB mean
+IMAGENET_STD = 0.229, 0.224, 0.225  # RGB standard deviation
+
+
+class Albumentations:
+    # YOLOv5 Albumentations class (optional, only used if package is installed)
+    def __init__(self, size=640):
+        self.transform = None
+        prefix = colorstr('albumentations: ')
+        try:
+            import albumentations as A
+            check_version(A.__version__, '1.0.3', hard=True)  # version requirement
+
+            T = [
+                A.RandomResizedCrop(height=size, width=size, scale=(0.8, 1.0), ratio=(0.9, 1.11), p=0.0),
+                A.Blur(p=0.01),
+                A.MedianBlur(p=0.01),
+                A.ToGray(p=0.01),
+                A.CLAHE(p=0.01),
+                A.RandomBrightnessContrast(p=0.0),
+                A.RandomGamma(p=0.0),
+                A.ImageCompression(quality_lower=75, p=0.0)]  # transforms
+            self.transform = A.Compose(T, bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))
+
+            LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p))
+        except ImportError:  # package not installed, skip
+            pass
+        except Exception as e:
+            LOGGER.info(f'{prefix}{e}')
+
+    def __call__(self, im, labels, p=1.0):
+        if self.transform and random.random() < p:
+            new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0])  # transformed
+            im, labels = new['image'], np.array([[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])])
+        return im, labels
+
+
+def normalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD, inplace=False):
+    # Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = (x - mean) / std
+    return TF.normalize(x, mean, std, inplace=inplace)
+
+
+def denormalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD):
+    # Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = x * std + mean
+    for i in range(3):
+        x[:, i] = x[:, i] * std[i] + mean[i]
+    return x
+
+
+def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
+    # HSV color-space augmentation
+    if hgain or sgain or vgain:
+        r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1  # random gains
+        hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV))
+        dtype = im.dtype  # uint8
+
+        x = np.arange(0, 256, dtype=r.dtype)
+        lut_hue = ((x * r[0]) % 180).astype(dtype)
+        lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
+        lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
+
+        im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
+        cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im)  # no return needed
+
+
+def hist_equalize(im, clahe=True, bgr=False):
+    # Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255
+    yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)
+    if clahe:
+        c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
+        yuv[:, :, 0] = c.apply(yuv[:, :, 0])
+    else:
+        yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0])  # equalize Y channel histogram
+    return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB)  # convert YUV image to RGB
+
+
+def replicate(im, labels):
+    # Replicate labels
+    h, w = im.shape[:2]
+    boxes = labels[:, 1:].astype(int)
+    x1, y1, x2, y2 = boxes.T
+    s = ((x2 - x1) + (y2 - y1)) / 2  # side length (pixels)
+    for i in s.argsort()[:round(s.size * 0.5)]:  # smallest indices
+        x1b, y1b, x2b, y2b = boxes[i]
+        bh, bw = y2b - y1b, x2b - x1b
+        yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw))  # offset x, y
+        x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
+        im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b]  # im4[ymin:ymax, xmin:xmax]
+        labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
+
+    return im, labels
+
+
+def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
+    # Resize and pad image while meeting stride-multiple constraints
+    shape = im.shape[:2]  # current shape [height, width]
+    if isinstance(new_shape, int):
+        new_shape = (new_shape, new_shape)
+
+    # Scale ratio (new / old)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:  # only scale down, do not scale up (for better val mAP)
+        r = min(r, 1.0)
+
+    # Compute padding
+    ratio = r, r  # width, height ratios
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+    if auto:  # minimum rectangle
+        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
+    elif scaleFill:  # stretch
+        dw, dh = 0.0, 0.0
+        new_unpad = (new_shape[1], new_shape[0])
+        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios
+
+    dw /= 2  # divide padding into 2 sides
+    dh /= 2
+
+    if shape[::-1] != new_unpad:  # resize
+        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
+    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+    return im, ratio, (dw, dh)
+
+
+def random_perspective(im,
+                       targets=(),
+                       segments=(),
+                       degrees=10,
+                       translate=.1,
+                       scale=.1,
+                       shear=10,
+                       perspective=0.0,
+                       border=(0, 0)):
+    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10))
+    # targets = [cls, xyxy]
+
+    height = im.shape[0] + border[0] * 2  # shape(h,w,c)
+    width = im.shape[1] + border[1] * 2
+
+    # Center
+    C = np.eye(3)
+    C[0, 2] = -im.shape[1] / 2  # x translation (pixels)
+    C[1, 2] = -im.shape[0] / 2  # y translation (pixels)
+
+    # Perspective
+    P = np.eye(3)
+    P[2, 0] = random.uniform(-perspective, perspective)  # x perspective (about y)
+    P[2, 1] = random.uniform(-perspective, perspective)  # y perspective (about x)
+
+    # Rotation and Scale
+    R = np.eye(3)
+    a = random.uniform(-degrees, degrees)
+    # a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotations
+    s = random.uniform(1 - scale, 1 + scale)
+    # s = 2 ** random.uniform(-scale, scale)
+    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
+
+    # Shear
+    S = np.eye(3)
+    S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
+    S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)
+
+    # Translation
+    T = np.eye(3)
+    T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width  # x translation (pixels)
+    T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height  # y translation (pixels)
+
+    # Combined rotation matrix
+    M = T @ S @ R @ P @ C  # order of operations (right to left) is IMPORTANT
+    if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changed
+        if perspective:
+            im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114))
+        else:  # affine
+            im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
+
+    # Visualize
+    # import matplotlib.pyplot as plt
+    # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
+    # ax[0].imshow(im[:, :, ::-1])  # base
+    # ax[1].imshow(im2[:, :, ::-1])  # warped
+
+    # Transform label coordinates
+    n = len(targets)
+    if n:
+        use_segments = any(x.any() for x in segments) and len(segments) == n
+        new = np.zeros((n, 4))
+        if use_segments:  # warp segments
+            segments = resample_segments(segments)  # upsample
+            for i, segment in enumerate(segments):
+                xy = np.ones((len(segment), 3))
+                xy[:, :2] = segment
+                xy = xy @ M.T  # transform
+                xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]  # perspective rescale or affine
+
+                # clip
+                new[i] = segment2box(xy, width, height)
+
+        else:  # warp boxes
+            xy = np.ones((n * 4, 3))
+            xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
+            xy = xy @ M.T  # transform
+            xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8)  # perspective rescale or affine
+
+            # create new boxes
+            x = xy[:, [0, 2, 4, 6]]
+            y = xy[:, [1, 3, 5, 7]]
+            new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
+
+            # clip
+            new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
+            new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
+
+        # filter candidates
+        i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
+        targets = targets[i]
+        targets[:, 1:5] = new[i]
+
+    return im, targets
+
+
+def copy_paste(im, labels, segments, p=0.5):
+    # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
+    n = len(segments)
+    if p and n:
+        h, w, c = im.shape  # height, width, channels
+        im_new = np.zeros(im.shape, np.uint8)
+        for j in random.sample(range(n), k=round(p * n)):
+            l, s = labels[j], segments[j]
+            box = w - l[3], l[2], w - l[1], l[4]
+            ioa = bbox_ioa(box, labels[:, 1:5])  # intersection over area
+            if (ioa < 0.30).all():  # allow 30% obscuration of existing labels
+                labels = np.concatenate((labels, [[l[0], *box]]), 0)
+                segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
+                cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (1, 1, 1), cv2.FILLED)
+
+        result = cv2.flip(im, 1)  # augment segments (flip left-right)
+        i = cv2.flip(im_new, 1).astype(bool)
+        im[i] = result[i]  # cv2.imwrite('debug.jpg', im)  # debug
+
+    return im, labels, segments
+
+
+def cutout(im, labels, p=0.5):
+    # Applies image cutout augmentation https://arxiv.org/abs/1708.04552
+    if random.random() < p:
+        h, w = im.shape[:2]
+        scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16  # image size fraction
+        for s in scales:
+            mask_h = random.randint(1, int(h * s))  # create random masks
+            mask_w = random.randint(1, int(w * s))
+
+            # box
+            xmin = max(0, random.randint(0, w) - mask_w // 2)
+            ymin = max(0, random.randint(0, h) - mask_h // 2)
+            xmax = min(w, xmin + mask_w)
+            ymax = min(h, ymin + mask_h)
+
+            # apply random color mask
+            im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
+
+            # return unobscured labels
+            if len(labels) and s > 0.03:
+                box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
+                ioa = bbox_ioa(box, xywhn2xyxy(labels[:, 1:5], w, h))  # intersection over area
+                labels = labels[ioa < 0.60]  # remove >60% obscured labels
+
+    return labels
+
+
+def mixup(im, labels, im2, labels2):
+    # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
+    r = np.random.beta(32.0, 32.0)  # mixup ratio, alpha=beta=32.0
+    im = (im * r + im2 * (1 - r)).astype(np.uint8)
+    labels = np.concatenate((labels, labels2), 0)
+    return im, labels
+
+
+def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):  # box1(4,n), box2(4,n)
+    # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
+    w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
+    w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
+    ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps))  # aspect ratio
+    return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr)  # candidates
+
+
+def classify_albumentations(
+        augment=True,
+        size=224,
+        scale=(0.08, 1.0),
+        ratio=(0.75, 1.0 / 0.75),  # 0.75, 1.33
+        hflip=0.5,
+        vflip=0.0,
+        jitter=0.4,
+        mean=IMAGENET_MEAN,
+        std=IMAGENET_STD,
+        auto_aug=False):
+    # YOLOv5 classification Albumentations (optional, only used if package is installed)
+    prefix = colorstr('albumentations: ')
+    try:
+        import albumentations as A
+        from albumentations.pytorch import ToTensorV2
+        check_version(A.__version__, '1.0.3', hard=True)  # version requirement
+        if augment:  # Resize and crop
+            T = [A.RandomResizedCrop(height=size, width=size, scale=scale, ratio=ratio)]
+            if auto_aug:
+                # TODO: implement AugMix, AutoAug & RandAug in albumentation
+                LOGGER.info(f'{prefix}auto augmentations are currently not supported')
+            else:
+                if hflip > 0:
+                    T += [A.HorizontalFlip(p=hflip)]
+                if vflip > 0:
+                    T += [A.VerticalFlip(p=vflip)]
+                if jitter > 0:
+                    color_jitter = (float(jitter),) * 3  # repeat value for brightness, contrast, satuaration, 0 hue
+                    T += [A.ColorJitter(*color_jitter, 0)]
+        else:  # Use fixed crop for eval set (reproducibility)
+            T = [A.SmallestMaxSize(max_size=size), A.CenterCrop(height=size, width=size)]
+        T += [A.Normalize(mean=mean, std=std), ToTensorV2()]  # Normalize and convert to Tensor
+        LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p))
+        return A.Compose(T)
+
+    except ImportError:  # package not installed, skip
+        LOGGER.warning(f'{prefix}⚠️ not found, install with `pip install albumentations` (recommended)')
+    except Exception as e:
+        LOGGER.info(f'{prefix}{e}')
+
+
+def classify_transforms(size=224):
+    # Transforms to apply if albumentations not installed
+    assert isinstance(size, int), f'ERROR: classify_transforms size {size} must be integer, not (list, tuple)'
+    # T.Compose([T.ToTensor(), T.Resize(size), T.CenterCrop(size), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
+    return T.Compose([CenterCrop(size), ToTensor(), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
+
+
+class LetterBox:
+    # YOLOv5 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
+    def __init__(self, size=(640, 640), auto=False, stride=32):
+        super().__init__()
+        self.h, self.w = (size, size) if isinstance(size, int) else size
+        self.auto = auto  # pass max size integer, automatically solve for short side using stride
+        self.stride = stride  # used with auto
+
+    def __call__(self, im):  # im = np.array HWC
+        imh, imw = im.shape[:2]
+        r = min(self.h / imh, self.w / imw)  # ratio of new/old
+        h, w = round(imh * r), round(imw * r)  # resized image
+        hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else self.h, self.w
+        top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1)
+        im_out = np.full((self.h, self.w, 3), 114, dtype=im.dtype)
+        im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
+        return im_out
+
+
+class CenterCrop:
+    # YOLOv5 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()])
+    def __init__(self, size=640):
+        super().__init__()
+        self.h, self.w = (size, size) if isinstance(size, int) else size
+
+    def __call__(self, im):  # im = np.array HWC
+        imh, imw = im.shape[:2]
+        m = min(imh, imw)  # min dimension
+        top, left = (imh - m) // 2, (imw - m) // 2
+        return cv2.resize(im[top:top + m, left:left + m], (self.w, self.h), interpolation=cv2.INTER_LINEAR)
+
+
+class ToTensor:
+    # YOLOv5 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
+    def __init__(self, half=False):
+        super().__init__()
+        self.half = half
+
+    def __call__(self, im):  # im = np.array HWC in BGR order
+        im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1])  # HWC to CHW -> BGR to RGB -> contiguous
+        im = torch.from_numpy(im)  # to torch
+        im = im.half() if self.half else im.float()  # uint8 to fp16/32
+        im /= 255.0  # 0-255 to 0.0-1.0
+        return im
diff --git a/utils/autoanchor.py b/utils/autoanchor.py
index 75b350da729c..bb5cf6e6965e 100644
--- a/utils/autoanchor.py
+++ b/utils/autoanchor.py
@@ -1,29 +1,34 @@
-# Auto-anchor utils
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+AutoAnchor utils
+"""
+
+import random
 
 import numpy as np
 import torch
 import yaml
-from scipy.cluster.vq import kmeans
 from tqdm import tqdm
 
-from utils.general import colorstr
+from utils import TryExcept
+from utils.general import LOGGER, TQDM_BAR_FORMAT, colorstr
+
+PREFIX = colorstr('AutoAnchor: ')
 
 
 def check_anchor_order(m):
     # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
-    a = m.anchor_grid.prod(-1).view(-1)  # anchor area
+    a = m.anchors.prod(-1).mean(-1).view(-1)  # mean anchor area per output layer
     da = a[-1] - a[0]  # delta a
     ds = m.stride[-1] - m.stride[0]  # delta s
-    if da.sign() != ds.sign():  # same order
-        print('Reversing anchor order')
+    if da and (da.sign() != ds.sign()):  # same order
+        LOGGER.info(f'{PREFIX}Reversing anchor order')
         m.anchors[:] = m.anchors.flip(0)
-        m.anchor_grid[:] = m.anchor_grid.flip(0)
 
 
+@TryExcept(f'{PREFIX}ERROR')
 def check_anchors(dataset, model, thr=4.0, imgsz=640):
     # Check anchor fit to data, recompute if necessary
-    prefix = colorstr('autoanchor: ')
-    print(f'\n{prefix}Analyzing anchors... ', end='')
     m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1]  # Detect()
     shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
     scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1))  # augment scale
@@ -31,39 +36,39 @@ def check_anchors(dataset, model, thr=4.0, imgsz=640):
 
     def metric(k):  # compute metric
         r = wh[:, None] / k[None]
-        x = torch.min(r, 1. / r).min(2)[0]  # ratio metric
+        x = torch.min(r, 1 / r).min(2)[0]  # ratio metric
         best = x.max(1)[0]  # best_x
-        aat = (x > 1. / thr).float().sum(1).mean()  # anchors above threshold
-        bpr = (best > 1. / thr).float().mean()  # best possible recall
+        aat = (x > 1 / thr).float().sum(1).mean()  # anchors above threshold
+        bpr = (best > 1 / thr).float().mean()  # best possible recall
         return bpr, aat
 
-    anchors = m.anchor_grid.clone().cpu().view(-1, 2)  # current anchors
-    bpr, aat = metric(anchors)
-    print(f'anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}', end='')
-    if bpr < 0.98:  # threshold to recompute
-        print('. Attempting to improve anchors, please wait...')
-        na = m.anchor_grid.numel() // 2  # number of anchors
-        try:
-            anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
-        except Exception as e:
-            print(f'{prefix}ERROR: {e}')
+    stride = m.stride.to(m.anchors.device).view(-1, 1, 1)  # model strides
+    anchors = m.anchors.clone() * stride  # current anchors
+    bpr, aat = metric(anchors.cpu().view(-1, 2))
+    s = f'\n{PREFIX}{aat:.2f} anchors/target, {bpr:.3f} Best Possible Recall (BPR). '
+    if bpr > 0.98:  # threshold to recompute
+        LOGGER.info(f'{s}Current anchors are a good fit to dataset ✅')
+    else:
+        LOGGER.info(f'{s}Anchors are a poor fit to dataset ⚠️, attempting to improve...')
+        na = m.anchors.numel() // 2  # number of anchors
+        anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
         new_bpr = metric(anchors)[0]
         if new_bpr > bpr:  # replace anchors
             anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors)
-            m.anchor_grid[:] = anchors.clone().view_as(m.anchor_grid)  # for inference
-            m.anchors[:] = anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1)  # loss
-            check_anchor_order(m)
-            print(f'{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future.')
+            m.anchors[:] = anchors.clone().view_as(m.anchors)
+            check_anchor_order(m)  # must be in pixel-space (not grid-space)
+            m.anchors /= stride
+            s = f'{PREFIX}Done ✅ (optional: update model *.yaml to use these anchors in the future)'
         else:
-            print(f'{prefix}Original anchors better than new anchors. Proceeding with original anchors.')
-    print('')  # newline
+            s = f'{PREFIX}Done ⚠️ (original anchors better than new anchors, proceeding with original anchors)'
+        LOGGER.info(s)
 
 
-def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
+def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
     """ Creates kmeans-evolved anchors from training dataset
 
         Arguments:
-            path: path to dataset *.yaml, or a loaded dataset
+            dataset: path to data.yaml, or a loaded dataset
             n: number of anchors
             img_size: image size used for training
             thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
@@ -76,12 +81,14 @@ def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=10
         Usage:
             from utils.autoanchor import *; _ = kmean_anchors()
     """
-    thr = 1. / thr
-    prefix = colorstr('autoanchor: ')
+    from scipy.cluster.vq import kmeans
+
+    npr = np.random
+    thr = 1 / thr
 
     def metric(k, wh):  # compute metrics
         r = wh[:, None] / k[None]
-        x = torch.min(r, 1. / r).min(2)[0]  # ratio metric
+        x = torch.min(r, 1 / r).min(2)[0]  # ratio metric
         # x = wh_iou(wh, torch.tensor(k))  # iou metric
         return x, x.max(1)[0]  # x, best_x
 
@@ -89,24 +96,24 @@ def anchor_fitness(k):  # mutation fitness
         _, best = metric(torch.tensor(k, dtype=torch.float32), wh)
         return (best * (best > thr).float()).mean()  # fitness
 
-    def print_results(k):
+    def print_results(k, verbose=True):
         k = k[np.argsort(k.prod(1))]  # sort small to large
         x, best = metric(k, wh0)
         bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n  # best possible recall, anch > thr
-        print(f'{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr')
-        print(f'{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, '
-              f'past_thr={x[x > thr].mean():.3f}-mean: ', end='')
-        for i, x in enumerate(k):
-            print('%i,%i' % (round(x[0]), round(x[1])), end=',  ' if i < len(k) - 1 else '\n')  # use in *.cfg
+        s = f'{PREFIX}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr\n' \
+            f'{PREFIX}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' \
+            f'past_thr={x[x > thr].mean():.3f}-mean: '
+        for x in k:
+            s += '%i,%i, ' % (round(x[0]), round(x[1]))
+        if verbose:
+            LOGGER.info(s[:-2])
         return k
 
-    if isinstance(path, str):  # *.yaml file
-        with open(path) as f:
+    if isinstance(dataset, str):  # *.yaml file
+        with open(dataset, errors='ignore') as f:
             data_dict = yaml.safe_load(f)  # model dict
-        from utils.datasets import LoadImagesAndLabels
+        from utils.dataloaders import LoadImagesAndLabels
         dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
-    else:
-        dataset = path  # dataset
 
     # Get label wh
     shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
@@ -115,19 +122,22 @@ def print_results(k):
     # Filter
     i = (wh0 < 3.0).any(1).sum()
     if i:
-        print(f'{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.')
-    wh = wh0[(wh0 >= 2.0).any(1)]  # filter > 2 pixels
-    # wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1)  # multiply by random scale 0-1
-
-    # Kmeans calculation
-    print(f'{prefix}Running kmeans for {n} anchors on {len(wh)} points...')
-    s = wh.std(0)  # sigmas for whitening
-    k, dist = kmeans(wh / s, n, iter=30)  # points, mean distance
-    assert len(k) == n, print(f'{prefix}ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}')
-    k *= s
-    wh = torch.tensor(wh, dtype=torch.float32)  # filtered
-    wh0 = torch.tensor(wh0, dtype=torch.float32)  # unfiltered
-    k = print_results(k)
+        LOGGER.info(f'{PREFIX}WARNING ⚠️ Extremely small objects found: {i} of {len(wh0)} labels are <3 pixels in size')
+    wh = wh0[(wh0 >= 2.0).any(1)].astype(np.float32)  # filter > 2 pixels
+    # wh = wh * (npr.rand(wh.shape[0], 1) * 0.9 + 0.1)  # multiply by random scale 0-1
+
+    # Kmeans init
+    try:
+        LOGGER.info(f'{PREFIX}Running kmeans for {n} anchors on {len(wh)} points...')
+        assert n <= len(wh)  # apply overdetermined constraint
+        s = wh.std(0)  # sigmas for whitening
+        k = kmeans(wh / s, n, iter=30)[0] * s  # points
+        assert n == len(k)  # kmeans may return fewer points than requested if wh is insufficient or too similar
+    except Exception:
+        LOGGER.warning(f'{PREFIX}WARNING ⚠️ switching strategies from kmeans to random init')
+        k = np.sort(npr.rand(n * 2)).reshape(n, 2) * img_size  # random init
+    wh, wh0 = (torch.tensor(x, dtype=torch.float32) for x in (wh, wh0))
+    k = print_results(k, verbose=False)
 
     # Plot
     # k, d = [None] * 20, [None] * 20
@@ -142,19 +152,18 @@ def print_results(k):
     # fig.savefig('wh.png', dpi=200)
 
     # Evolve
-    npr = np.random
     f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1  # fitness, generations, mutation prob, sigma
-    pbar = tqdm(range(gen), desc=f'{prefix}Evolving anchors with Genetic Algorithm:')  # progress bar
+    pbar = tqdm(range(gen), bar_format=TQDM_BAR_FORMAT)  # progress bar
     for _ in pbar:
         v = np.ones(sh)
         while (v == 1).all():  # mutate until a change occurs (prevent duplicates)
-            v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
+            v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
         kg = (k.copy() * v).clip(min=2.0)
         fg = anchor_fitness(kg)
         if fg > f:
             f, k = fg, kg.copy()
-            pbar.desc = f'{prefix}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}'
+            pbar.desc = f'{PREFIX}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}'
             if verbose:
-                print_results(k)
+                print_results(k, verbose)
 
-    return print_results(k)
+    return print_results(k).astype(np.float32)
diff --git a/utils/autobatch.py b/utils/autobatch.py
new file mode 100644
index 000000000000..bdeb91c3d2bd
--- /dev/null
+++ b/utils/autobatch.py
@@ -0,0 +1,72 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Auto-batch utils
+"""
+
+from copy import deepcopy
+
+import numpy as np
+import torch
+
+from utils.general import LOGGER, colorstr
+from utils.torch_utils import profile
+
+
+def check_train_batch_size(model, imgsz=640, amp=True):
+    # Check YOLOv5 training batch size
+    with torch.cuda.amp.autocast(amp):
+        return autobatch(deepcopy(model).train(), imgsz)  # compute optimal batch size
+
+
+def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
+    # Automatically estimate best YOLOv5 batch size to use `fraction` of available CUDA memory
+    # Usage:
+    #     import torch
+    #     from utils.autobatch import autobatch
+    #     model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)
+    #     print(autobatch(model))
+
+    # Check device
+    prefix = colorstr('AutoBatch: ')
+    LOGGER.info(f'{prefix}Computing optimal batch size for --imgsz {imgsz}')
+    device = next(model.parameters()).device  # get model device
+    if device.type == 'cpu':
+        LOGGER.info(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}')
+        return batch_size
+    if torch.backends.cudnn.benchmark:
+        LOGGER.info(f'{prefix} ⚠️ Requires torch.backends.cudnn.benchmark=False, using default batch-size {batch_size}')
+        return batch_size
+
+    # Inspect CUDA memory
+    gb = 1 << 30  # bytes to GiB (1024 ** 3)
+    d = str(device).upper()  # 'CUDA:0'
+    properties = torch.cuda.get_device_properties(device)  # device properties
+    t = properties.total_memory / gb  # GiB total
+    r = torch.cuda.memory_reserved(device) / gb  # GiB reserved
+    a = torch.cuda.memory_allocated(device) / gb  # GiB allocated
+    f = t - (r + a)  # GiB free
+    LOGGER.info(f'{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free')
+
+    # Profile batch sizes
+    batch_sizes = [1, 2, 4, 8, 16]
+    try:
+        img = [torch.empty(b, 3, imgsz, imgsz) for b in batch_sizes]
+        results = profile(img, model, n=3, device=device)
+    except Exception as e:
+        LOGGER.warning(f'{prefix}{e}')
+
+    # Fit a solution
+    y = [x[2] for x in results if x]  # memory [2]
+    p = np.polyfit(batch_sizes[:len(y)], y, deg=1)  # first degree polynomial fit
+    b = int((f * fraction - p[1]) / p[0])  # y intercept (optimal batch size)
+    if None in results:  # some sizes failed
+        i = results.index(None)  # first fail index
+        if b >= batch_sizes[i]:  # y intercept above failure point
+            b = batch_sizes[max(i - 1, 0)]  # select prior safe point
+    if b < 1 or b > 1024:  # b outside of safe range
+        b = batch_size
+        LOGGER.warning(f'{prefix}WARNING ⚠️ CUDA anomaly detected, recommend restart environment and retry command.')
+
+    fraction = (np.polyval(p, b) + r + a) / t  # actual fraction predicted
+    LOGGER.info(f'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅')
+    return b
diff --git a/utils/aws/resume.py b/utils/aws/resume.py
index 4b0d4246b594..b21731c979a1 100644
--- a/utils/aws/resume.py
+++ b/utils/aws/resume.py
@@ -8,7 +8,10 @@
 import torch
 import yaml
 
-sys.path.append('./')  # to run '$ python *.py' files in subdirectories
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[2]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
 
 port = 0  # --master_port
 path = Path('').resolve()
@@ -18,7 +21,7 @@
         continue
 
     # Load opt.yaml
-    with open(last.parent.parent / 'opt.yaml') as f:
+    with open(last.parent.parent / 'opt.yaml', errors='ignore') as f:
         opt = yaml.safe_load(f)
 
     # Get device count
@@ -28,7 +31,7 @@
 
     if ddp:  # multi-GPU
         port += 1
-        cmd = f'python -m torch.distributed.launch --nproc_per_node {nd} --master_port {port} train.py --resume {last}'
+        cmd = f'python -m torch.distributed.run --nproc_per_node {nd} --master_port {port} train.py --resume {last}'
     else:  # single-GPU
         cmd = f'python train.py --resume {last}'
 
diff --git a/utils/aws/userdata.sh b/utils/aws/userdata.sh
index 890606b76a06..5fc1332ac1b0 100644
--- a/utils/aws/userdata.sh
+++ b/utils/aws/userdata.sh
@@ -7,9 +7,9 @@
 cd home/ubuntu
 if [ ! -d yolov5 ]; then
   echo "Running first-time script." # install dependencies, download COCO, pull Docker
-  git clone https://github.com/ultralytics/yolov5 && sudo chmod -R 777 yolov5
+  git clone https://github.com/ultralytics/yolov5 -b master && sudo chmod -R 777 yolov5
   cd yolov5
-  bash data/scripts/get_coco.sh && echo "Data done." &
+  bash data/scripts/get_coco.sh && echo "COCO done." &
   sudo docker pull ultralytics/yolov5:latest && echo "Docker done." &
   python -m pip install --upgrade pip && pip install -r requirements.txt && python detect.py && echo "Requirements done." &
   wait && echo "All tasks done." # finish background tasks
diff --git a/utils/callbacks.py b/utils/callbacks.py
new file mode 100644
index 000000000000..166d8938322d
--- /dev/null
+++ b/utils/callbacks.py
@@ -0,0 +1,76 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Callback utils
+"""
+
+import threading
+
+
+class Callbacks:
+    """"
+    Handles all registered callbacks for YOLOv5 Hooks
+    """
+
+    def __init__(self):
+        # Define the available callbacks
+        self._callbacks = {
+            'on_pretrain_routine_start': [],
+            'on_pretrain_routine_end': [],
+            'on_train_start': [],
+            'on_train_epoch_start': [],
+            'on_train_batch_start': [],
+            'optimizer_step': [],
+            'on_before_zero_grad': [],
+            'on_train_batch_end': [],
+            'on_train_epoch_end': [],
+            'on_val_start': [],
+            'on_val_batch_start': [],
+            'on_val_image_end': [],
+            'on_val_batch_end': [],
+            'on_val_end': [],
+            'on_fit_epoch_end': [],  # fit = train + val
+            'on_model_save': [],
+            'on_train_end': [],
+            'on_params_update': [],
+            'teardown': [],}
+        self.stop_training = False  # set True to interrupt training
+
+    def register_action(self, hook, name='', callback=None):
+        """
+        Register a new action to a callback hook
+
+        Args:
+            hook: The callback hook name to register the action to
+            name: The name of the action for later reference
+            callback: The callback to fire
+        """
+        assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}"
+        assert callable(callback), f"callback '{callback}' is not callable"
+        self._callbacks[hook].append({'name': name, 'callback': callback})
+
+    def get_registered_actions(self, hook=None):
+        """"
+        Returns all the registered actions by callback hook
+
+        Args:
+            hook: The name of the hook to check, defaults to all
+        """
+        return self._callbacks[hook] if hook else self._callbacks
+
+    def run(self, hook, *args, thread=False, **kwargs):
+        """
+        Loop through the registered actions and fire all callbacks on main thread
+
+        Args:
+            hook: The name of the hook to check, defaults to all
+            args: Arguments to receive from YOLOv5
+            thread: (boolean) Run callbacks in daemon thread
+            kwargs: Keyword Arguments to receive from YOLOv5
+        """
+
+        assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}"
+        for logger in self._callbacks[hook]:
+            if thread:
+                threading.Thread(target=logger['callback'], args=args, kwargs=kwargs, daemon=True).start()
+            else:
+                logger['callback'](*args, **kwargs)
diff --git a/utils/dataloaders.py b/utils/dataloaders.py
new file mode 100644
index 000000000000..28d5b7974cf8
--- /dev/null
+++ b/utils/dataloaders.py
@@ -0,0 +1,1222 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Dataloaders and dataset utils
+"""
+
+import contextlib
+import glob
+import hashlib
+import json
+import math
+import os
+import random
+import shutil
+import time
+from itertools import repeat
+from multiprocessing.pool import Pool, ThreadPool
+from pathlib import Path
+from threading import Thread
+from urllib.parse import urlparse
+
+import numpy as np
+import psutil
+import torch
+import torch.nn.functional as F
+import torchvision
+import yaml
+from PIL import ExifTags, Image, ImageOps
+from torch.utils.data import DataLoader, Dataset, dataloader, distributed
+from tqdm import tqdm
+
+from utils.augmentations import (Albumentations, augment_hsv, classify_albumentations, classify_transforms, copy_paste,
+                                 letterbox, mixup, random_perspective)
+from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, TQDM_BAR_FORMAT, check_dataset, check_requirements,
+                           check_yaml, clean_str, cv2, is_colab, is_kaggle, segments2boxes, unzip_file, xyn2xy,
+                           xywh2xyxy, xywhn2xyxy, xyxy2xywhn)
+from utils.torch_utils import torch_distributed_zero_first
+
+# Parameters
+HELP_URL = 'See https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
+IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp', 'pfm'  # include image suffixes
+VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv'  # include video suffixes
+LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))  # https://pytorch.org/docs/stable/elastic/run.html
+RANK = int(os.getenv('RANK', -1))
+PIN_MEMORY = str(os.getenv('PIN_MEMORY', True)).lower() == 'true'  # global pin_memory for dataloaders
+
+# Get orientation exif tag
+for orientation in ExifTags.TAGS.keys():
+    if ExifTags.TAGS[orientation] == 'Orientation':
+        break
+
+
+def get_hash(paths):
+    # Returns a single hash value of a list of paths (files or dirs)
+    size = sum(os.path.getsize(p) for p in paths if os.path.exists(p))  # sizes
+    h = hashlib.sha256(str(size).encode())  # hash sizes
+    h.update(''.join(paths).encode())  # hash paths
+    return h.hexdigest()  # return hash
+
+
+def exif_size(img):
+    # Returns exif-corrected PIL size
+    s = img.size  # (width, height)
+    with contextlib.suppress(Exception):
+        rotation = dict(img._getexif().items())[orientation]
+        if rotation in [6, 8]:  # rotation 270 or 90
+            s = (s[1], s[0])
+    return s
+
+
+def exif_transpose(image):
+    """
+    Transpose a PIL image accordingly if it has an EXIF Orientation tag.
+    Inplace version of https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py exif_transpose()
+
+    :param image: The image to transpose.
+    :return: An image.
+    """
+    exif = image.getexif()
+    orientation = exif.get(0x0112, 1)  # default 1
+    if orientation > 1:
+        method = {
+            2: Image.FLIP_LEFT_RIGHT,
+            3: Image.ROTATE_180,
+            4: Image.FLIP_TOP_BOTTOM,
+            5: Image.TRANSPOSE,
+            6: Image.ROTATE_270,
+            7: Image.TRANSVERSE,
+            8: Image.ROTATE_90}.get(orientation)
+        if method is not None:
+            image = image.transpose(method)
+            del exif[0x0112]
+            image.info['exif'] = exif.tobytes()
+    return image
+
+
+def seed_worker(worker_id):
+    # Set dataloader worker seed https://pytorch.org/docs/stable/notes/randomness.html#dataloader
+    worker_seed = torch.initial_seed() % 2 ** 32
+    np.random.seed(worker_seed)
+    random.seed(worker_seed)
+
+
+def create_dataloader(path,
+                      imgsz,
+                      batch_size,
+                      stride,
+                      single_cls=False,
+                      hyp=None,
+                      augment=False,
+                      cache=False,
+                      pad=0.0,
+                      rect=False,
+                      rank=-1,
+                      workers=8,
+                      image_weights=False,
+                      quad=False,
+                      prefix='',
+                      shuffle=False,
+                      seed=0):
+    if rect and shuffle:
+        LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False')
+        shuffle = False
+    with torch_distributed_zero_first(rank):  # init dataset *.cache only once if DDP
+        dataset = LoadImagesAndLabels(
+            path,
+            imgsz,
+            batch_size,
+            augment=augment,  # augmentation
+            hyp=hyp,  # hyperparameters
+            rect=rect,  # rectangular batches
+            cache_images=cache,
+            single_cls=single_cls,
+            stride=int(stride),
+            pad=pad,
+            image_weights=image_weights,
+            prefix=prefix)
+
+    batch_size = min(batch_size, len(dataset))
+    nd = torch.cuda.device_count()  # number of CUDA devices
+    nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers])  # number of workers
+    sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
+    loader = DataLoader if image_weights else InfiniteDataLoader  # only DataLoader allows for attribute updates
+    generator = torch.Generator()
+    generator.manual_seed(6148914691236517205 + seed + RANK)
+    return loader(dataset,
+                  batch_size=batch_size,
+                  shuffle=shuffle and sampler is None,
+                  num_workers=nw,
+                  sampler=sampler,
+                  pin_memory=PIN_MEMORY,
+                  collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn,
+                  worker_init_fn=seed_worker,
+                  generator=generator), dataset
+
+
+class InfiniteDataLoader(dataloader.DataLoader):
+    """ Dataloader that reuses workers
+
+    Uses same syntax as vanilla DataLoader
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
+        self.iterator = super().__iter__()
+
+    def __len__(self):
+        return len(self.batch_sampler.sampler)
+
+    def __iter__(self):
+        for _ in range(len(self)):
+            yield next(self.iterator)
+
+
+class _RepeatSampler:
+    """ Sampler that repeats forever
+
+    Args:
+        sampler (Sampler)
+    """
+
+    def __init__(self, sampler):
+        self.sampler = sampler
+
+    def __iter__(self):
+        while True:
+            yield from iter(self.sampler)
+
+
+class LoadScreenshots:
+    # YOLOv5 screenshot dataloader, i.e. `python detect.py --source "screen 0 100 100 512 256"`
+    def __init__(self, source, img_size=640, stride=32, auto=True, transforms=None):
+        # source = [screen_number left top width height] (pixels)
+        check_requirements('mss')
+        import mss
+
+        source, *params = source.split()
+        self.screen, left, top, width, height = 0, None, None, None, None  # default to full screen 0
+        if len(params) == 1:
+            self.screen = int(params[0])
+        elif len(params) == 4:
+            left, top, width, height = (int(x) for x in params)
+        elif len(params) == 5:
+            self.screen, left, top, width, height = (int(x) for x in params)
+        self.img_size = img_size
+        self.stride = stride
+        self.transforms = transforms
+        self.auto = auto
+        self.mode = 'stream'
+        self.frame = 0
+        self.sct = mss.mss()
+
+        # Parse monitor shape
+        monitor = self.sct.monitors[self.screen]
+        self.top = monitor['top'] if top is None else (monitor['top'] + top)
+        self.left = monitor['left'] if left is None else (monitor['left'] + left)
+        self.width = width or monitor['width']
+        self.height = height or monitor['height']
+        self.monitor = {'left': self.left, 'top': self.top, 'width': self.width, 'height': self.height}
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        # mss screen capture: get raw pixels from the screen as np array
+        im0 = np.array(self.sct.grab(self.monitor))[:, :, :3]  # [:, :, :3] BGRA to BGR
+        s = f'screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: '
+
+        if self.transforms:
+            im = self.transforms(im0)  # transforms
+        else:
+            im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0]  # padded resize
+            im = im.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
+            im = np.ascontiguousarray(im)  # contiguous
+        self.frame += 1
+        return str(self.screen), im, im0, None, s  # screen, img, original img, im0s, s
+
+
+class LoadImages:
+    # YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
+    def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
+        if isinstance(path, str) and Path(path).suffix == '.txt':  # *.txt file with img/vid/dir on each line
+            path = Path(path).read_text().rsplit()
+        files = []
+        for p in sorted(path) if isinstance(path, (list, tuple)) else [path]:
+            p = str(Path(p).resolve())
+            if '*' in p:
+                files.extend(sorted(glob.glob(p, recursive=True)))  # glob
+            elif os.path.isdir(p):
+                files.extend(sorted(glob.glob(os.path.join(p, '*.*'))))  # dir
+            elif os.path.isfile(p):
+                files.append(p)  # files
+            else:
+                raise FileNotFoundError(f'{p} does not exist')
+
+        images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS]
+        videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS]
+        ni, nv = len(images), len(videos)
+
+        self.img_size = img_size
+        self.stride = stride
+        self.files = images + videos
+        self.nf = ni + nv  # number of files
+        self.video_flag = [False] * ni + [True] * nv
+        self.mode = 'image'
+        self.auto = auto
+        self.transforms = transforms  # optional
+        self.vid_stride = vid_stride  # video frame-rate stride
+        if any(videos):
+            self._new_video(videos[0])  # new video
+        else:
+            self.cap = None
+        assert self.nf > 0, f'No images or videos found in {p}. ' \
+                            f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}'
+
+    def __iter__(self):
+        self.count = 0
+        return self
+
+    def __next__(self):
+        if self.count == self.nf:
+            raise StopIteration
+        path = self.files[self.count]
+
+        if self.video_flag[self.count]:
+            # Read video
+            self.mode = 'video'
+            for _ in range(self.vid_stride):
+                self.cap.grab()
+            ret_val, im0 = self.cap.retrieve()
+            while not ret_val:
+                self.count += 1
+                self.cap.release()
+                if self.count == self.nf:  # last video
+                    raise StopIteration
+                path = self.files[self.count]
+                self._new_video(path)
+                ret_val, im0 = self.cap.read()
+
+            self.frame += 1
+            # im0 = self._cv2_rotate(im0)  # for use if cv2 autorotation is False
+            s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: '
+
+        else:
+            # Read image
+            self.count += 1
+            im0 = cv2.imread(path)  # BGR
+            assert im0 is not None, f'Image Not Found {path}'
+            s = f'image {self.count}/{self.nf} {path}: '
+
+        if self.transforms:
+            im = self.transforms(im0)  # transforms
+        else:
+            im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0]  # padded resize
+            im = im.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
+            im = np.ascontiguousarray(im)  # contiguous
+
+        return path, im, im0, self.cap, s
+
+    def _new_video(self, path):
+        # Create a new video capture object
+        self.frame = 0
+        self.cap = cv2.VideoCapture(path)
+        self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride)
+        self.orientation = int(self.cap.get(cv2.CAP_PROP_ORIENTATION_META))  # rotation degrees
+        # self.cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 0)  # disable https://github.com/ultralytics/yolov5/issues/8493
+
+    def _cv2_rotate(self, im):
+        # Rotate a cv2 video manually
+        if self.orientation == 0:
+            return cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE)
+        elif self.orientation == 180:
+            return cv2.rotate(im, cv2.ROTATE_90_COUNTERCLOCKWISE)
+        elif self.orientation == 90:
+            return cv2.rotate(im, cv2.ROTATE_180)
+        return im
+
+    def __len__(self):
+        return self.nf  # number of files
+
+
+class LoadStreams:
+    # YOLOv5 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP streams`
+    def __init__(self, sources='file.streams', img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
+        torch.backends.cudnn.benchmark = True  # faster for fixed-size inference
+        self.mode = 'stream'
+        self.img_size = img_size
+        self.stride = stride
+        self.vid_stride = vid_stride  # video frame-rate stride
+        sources = Path(sources).read_text().rsplit() if os.path.isfile(sources) else [sources]
+        n = len(sources)
+        self.sources = [clean_str(x) for x in sources]  # clean source names for later
+        self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [0] * n, [None] * n
+        for i, s in enumerate(sources):  # index, source
+            # Start thread to read frames from video stream
+            st = f'{i + 1}/{n}: {s}... '
+            if urlparse(s).hostname in ('www.youtube.com', 'youtube.com', 'youtu.be'):  # if source is YouTube video
+                # YouTube format i.e. 'https://www.youtube.com/watch?v=Zgi9g1ksQHc' or 'https://youtu.be/Zgi9g1ksQHc'
+                check_requirements(('pafy', 'youtube_dl==2020.12.2'))
+                import pafy
+                s = pafy.new(s).getbest(preftype='mp4').url  # YouTube URL
+            s = eval(s) if s.isnumeric() else s  # i.e. s = '0' local webcam
+            if s == 0:
+                assert not is_colab(), '--source 0 webcam unsupported on Colab. Rerun command in a local environment.'
+                assert not is_kaggle(), '--source 0 webcam unsupported on Kaggle. Rerun command in a local environment.'
+            cap = cv2.VideoCapture(s)
+            assert cap.isOpened(), f'{st}Failed to open {s}'
+            w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            fps = cap.get(cv2.CAP_PROP_FPS)  # warning: may return 0 or nan
+            self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf')  # infinite stream fallback
+            self.fps[i] = max((fps if math.isfinite(fps) else 0) % 100, 0) or 30  # 30 FPS fallback
+
+            _, self.imgs[i] = cap.read()  # guarantee first frame
+            self.threads[i] = Thread(target=self.update, args=([i, cap, s]), daemon=True)
+            LOGGER.info(f'{st} Success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)')
+            self.threads[i].start()
+        LOGGER.info('')  # newline
+
+        # check for common shapes
+        s = np.stack([letterbox(x, img_size, stride=stride, auto=auto)[0].shape for x in self.imgs])
+        self.rect = np.unique(s, axis=0).shape[0] == 1  # rect inference if all shapes equal
+        self.auto = auto and self.rect
+        self.transforms = transforms  # optional
+        if not self.rect:
+            LOGGER.warning('WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams.')
+
+    def update(self, i, cap, stream):
+        # Read stream `i` frames in daemon thread
+        n, f = 0, self.frames[i]  # frame number, frame array
+        while cap.isOpened() and n < f:
+            n += 1
+            cap.grab()  # .read() = .grab() followed by .retrieve()
+            if n % self.vid_stride == 0:
+                success, im = cap.retrieve()
+                if success:
+                    self.imgs[i] = im
+                else:
+                    LOGGER.warning('WARNING ⚠️ Video stream unresponsive, please check your IP camera connection.')
+                    self.imgs[i] = np.zeros_like(self.imgs[i])
+                    cap.open(stream)  # re-open stream if signal was lost
+            time.sleep(0.0)  # wait time
+
+    def __iter__(self):
+        self.count = -1
+        return self
+
+    def __next__(self):
+        self.count += 1
+        if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'):  # q to quit
+            cv2.destroyAllWindows()
+            raise StopIteration
+
+        im0 = self.imgs.copy()
+        if self.transforms:
+            im = np.stack([self.transforms(x) for x in im0])  # transforms
+        else:
+            im = np.stack([letterbox(x, self.img_size, stride=self.stride, auto=self.auto)[0] for x in im0])  # resize
+            im = im[..., ::-1].transpose((0, 3, 1, 2))  # BGR to RGB, BHWC to BCHW
+            im = np.ascontiguousarray(im)  # contiguous
+
+        return self.sources, im, im0, None, ''
+
+    def __len__(self):
+        return len(self.sources)  # 1E12 frames = 32 streams at 30 FPS for 30 years
+
+
+def img2label_paths(img_paths):
+    # Define label paths as a function of image paths
+    sa, sb = f'{os.sep}images{os.sep}', f'{os.sep}labels{os.sep}'  # /images/, /labels/ substrings
+    return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths]
+
+
+class LoadImagesAndLabels(Dataset):
+    # YOLOv5 train_loader/val_loader, loads images and labels for training and validation
+    cache_version = 0.6  # dataset labels *.cache version
+    rand_interp_methods = [cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4]
+
+    def __init__(self,
+                 path,
+                 img_size=640,
+                 batch_size=16,
+                 augment=False,
+                 hyp=None,
+                 rect=False,
+                 image_weights=False,
+                 cache_images=False,
+                 single_cls=False,
+                 stride=32,
+                 pad=0.0,
+                 min_items=0,
+                 prefix=''):
+        self.img_size = img_size
+        self.augment = augment
+        self.hyp = hyp
+        self.image_weights = image_weights
+        self.rect = False if image_weights else rect
+        self.mosaic = self.augment and not self.rect  # load 4 images at a time into a mosaic (only during training)
+        self.mosaic_border = [-img_size // 2, -img_size // 2]
+        self.stride = stride
+        self.path = path
+        self.albumentations = Albumentations(size=img_size) if augment else None
+
+        try:
+            f = []  # image files
+            for p in path if isinstance(path, list) else [path]:
+                p = Path(p)  # os-agnostic
+                if p.is_dir():  # dir
+                    f += glob.glob(str(p / '**' / '*.*'), recursive=True)
+                    # f = list(p.rglob('*.*'))  # pathlib
+                elif p.is_file():  # file
+                    with open(p) as t:
+                        t = t.read().strip().splitlines()
+                        parent = str(p.parent) + os.sep
+                        f += [x.replace('./', parent, 1) if x.startswith('./') else x for x in t]  # to global path
+                        # f += [p.parent / x.lstrip(os.sep) for x in t]  # to global path (pathlib)
+                else:
+                    raise FileNotFoundError(f'{prefix}{p} does not exist')
+            self.im_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS)
+            # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS])  # pathlib
+            assert self.im_files, f'{prefix}No images found'
+        except Exception as e:
+            raise Exception(f'{prefix}Error loading data from {path}: {e}\n{HELP_URL}') from e
+
+        # Check cache
+        self.label_files = img2label_paths(self.im_files)  # labels
+        cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache')
+        try:
+            cache, exists = np.load(cache_path, allow_pickle=True).item(), True  # load dict
+            assert cache['version'] == self.cache_version  # matches current version
+            assert cache['hash'] == get_hash(self.label_files + self.im_files)  # identical hash
+        except Exception:
+            cache, exists = self.cache_labels(cache_path, prefix), False  # run cache ops
+
+        # Display cache
+        nf, nm, ne, nc, n = cache.pop('results')  # found, missing, empty, corrupt, total
+        if exists and LOCAL_RANK in {-1, 0}:
+            d = f'Scanning {cache_path}... {nf} images, {nm + ne} backgrounds, {nc} corrupt'
+            tqdm(None, desc=prefix + d, total=n, initial=n, bar_format=TQDM_BAR_FORMAT)  # display cache results
+            if cache['msgs']:
+                LOGGER.info('\n'.join(cache['msgs']))  # display warnings
+        assert nf > 0 or not augment, f'{prefix}No labels found in {cache_path}, can not start training. {HELP_URL}'
+
+        # Read cache
+        [cache.pop(k) for k in ('hash', 'version', 'msgs')]  # remove items
+        labels, shapes, self.segments = zip(*cache.values())
+        nl = len(np.concatenate(labels, 0))  # number of labels
+        assert nl > 0 or not augment, f'{prefix}All labels empty in {cache_path}, can not start training. {HELP_URL}'
+        self.labels = list(labels)
+        self.shapes = np.array(shapes)
+        self.im_files = list(cache.keys())  # update
+        self.label_files = img2label_paths(cache.keys())  # update
+
+        # Filter images
+        if min_items:
+            include = np.array([len(x) >= min_items for x in self.labels]).nonzero()[0].astype(int)
+            LOGGER.info(f'{prefix}{n - len(include)}/{n} images filtered from dataset')
+            self.im_files = [self.im_files[i] for i in include]
+            self.label_files = [self.label_files[i] for i in include]
+            self.labels = [self.labels[i] for i in include]
+            self.segments = [self.segments[i] for i in include]
+            self.shapes = self.shapes[include]  # wh
+
+        # Create indices
+        n = len(self.shapes)  # number of images
+        bi = np.floor(np.arange(n) / batch_size).astype(int)  # batch index
+        nb = bi[-1] + 1  # number of batches
+        self.batch = bi  # batch index of image
+        self.n = n
+        self.indices = range(n)
+
+        # Update labels
+        include_class = []  # filter labels to include only these classes (optional)
+        self.segments = list(self.segments)
+        include_class_array = np.array(include_class).reshape(1, -1)
+        for i, (label, segment) in enumerate(zip(self.labels, self.segments)):
+            if include_class:
+                j = (label[:, 0:1] == include_class_array).any(1)
+                self.labels[i] = label[j]
+                if segment:
+                    self.segments[i] = [segment[idx] for idx, elem in enumerate(j) if elem]
+            if single_cls:  # single-class training, merge all classes into 0
+                self.labels[i][:, 0] = 0
+
+        # Rectangular Training
+        if self.rect:
+            # Sort by aspect ratio
+            s = self.shapes  # wh
+            ar = s[:, 1] / s[:, 0]  # aspect ratio
+            irect = ar.argsort()
+            self.im_files = [self.im_files[i] for i in irect]
+            self.label_files = [self.label_files[i] for i in irect]
+            self.labels = [self.labels[i] for i in irect]
+            self.segments = [self.segments[i] for i in irect]
+            self.shapes = s[irect]  # wh
+            ar = ar[irect]
+
+            # Set training image shapes
+            shapes = [[1, 1]] * nb
+            for i in range(nb):
+                ari = ar[bi == i]
+                mini, maxi = ari.min(), ari.max()
+                if maxi < 1:
+                    shapes[i] = [maxi, 1]
+                elif mini > 1:
+                    shapes[i] = [1, 1 / mini]
+
+            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(int) * stride
+
+        # Cache images into RAM/disk for faster training
+        if cache_images == 'ram' and not self.check_cache_ram(prefix=prefix):
+            cache_images = False
+        self.ims = [None] * n
+        self.npy_files = [Path(f).with_suffix('.npy') for f in self.im_files]
+        if cache_images:
+            b, gb = 0, 1 << 30  # bytes of cached images, bytes per gigabytes
+            self.im_hw0, self.im_hw = [None] * n, [None] * n
+            fcn = self.cache_images_to_disk if cache_images == 'disk' else self.load_image
+            results = ThreadPool(NUM_THREADS).imap(fcn, range(n))
+            pbar = tqdm(enumerate(results), total=n, bar_format=TQDM_BAR_FORMAT, disable=LOCAL_RANK > 0)
+            for i, x in pbar:
+                if cache_images == 'disk':
+                    b += self.npy_files[i].stat().st_size
+                else:  # 'ram'
+                    self.ims[i], self.im_hw0[i], self.im_hw[i] = x  # im, hw_orig, hw_resized = load_image(self, i)
+                    b += self.ims[i].nbytes
+                pbar.desc = f'{prefix}Caching images ({b / gb:.1f}GB {cache_images})'
+            pbar.close()
+
+    def check_cache_ram(self, safety_margin=0.1, prefix=''):
+        # Check image caching requirements vs available memory
+        b, gb = 0, 1 << 30  # bytes of cached images, bytes per gigabytes
+        n = min(self.n, 30)  # extrapolate from 30 random images
+        for _ in range(n):
+            im = cv2.imread(random.choice(self.im_files))  # sample image
+            ratio = self.img_size / max(im.shape[0], im.shape[1])  # max(h, w)  # ratio
+            b += im.nbytes * ratio ** 2
+        mem_required = b * self.n / n  # GB required to cache dataset into RAM
+        mem = psutil.virtual_memory()
+        cache = mem_required * (1 + safety_margin) < mem.available  # to cache or not to cache, that is the question
+        if not cache:
+            LOGGER.info(f'{prefix}{mem_required / gb:.1f}GB RAM required, '
+                        f'{mem.available / gb:.1f}/{mem.total / gb:.1f}GB available, '
+                        f"{'caching images ✅' if cache else 'not caching images ⚠️'}")
+        return cache
+
+    def cache_labels(self, path=Path('./labels.cache'), prefix=''):
+        # Cache dataset labels, check images and read shapes
+        x = {}  # dict
+        nm, nf, ne, nc, msgs = 0, 0, 0, 0, []  # number missing, found, empty, corrupt, messages
+        desc = f'{prefix}Scanning {path.parent / path.stem}...'
+        with Pool(NUM_THREADS) as pool:
+            pbar = tqdm(pool.imap(verify_image_label, zip(self.im_files, self.label_files, repeat(prefix))),
+                        desc=desc,
+                        total=len(self.im_files),
+                        bar_format=TQDM_BAR_FORMAT)
+            for im_file, lb, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar:
+                nm += nm_f
+                nf += nf_f
+                ne += ne_f
+                nc += nc_f
+                if im_file:
+                    x[im_file] = [lb, shape, segments]
+                if msg:
+                    msgs.append(msg)
+                pbar.desc = f'{desc} {nf} images, {nm + ne} backgrounds, {nc} corrupt'
+
+        pbar.close()
+        if msgs:
+            LOGGER.info('\n'.join(msgs))
+        if nf == 0:
+            LOGGER.warning(f'{prefix}WARNING ⚠️ No labels found in {path}. {HELP_URL}')
+        x['hash'] = get_hash(self.label_files + self.im_files)
+        x['results'] = nf, nm, ne, nc, len(self.im_files)
+        x['msgs'] = msgs  # warnings
+        x['version'] = self.cache_version  # cache version
+        try:
+            np.save(path, x)  # save cache for next time
+            path.with_suffix('.cache.npy').rename(path)  # remove .npy suffix
+            LOGGER.info(f'{prefix}New cache created: {path}')
+        except Exception as e:
+            LOGGER.warning(f'{prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable: {e}')  # not writeable
+        return x
+
+    def __len__(self):
+        return len(self.im_files)
+
+    # def __iter__(self):
+    #     self.count = -1
+    #     print('ran dataset iter')
+    #     #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
+    #     return self
+
+    def __getitem__(self, index):
+        index = self.indices[index]  # linear, shuffled, or image_weights
+
+        hyp = self.hyp
+        mosaic = self.mosaic and random.random() < hyp['mosaic']
+        if mosaic:
+            # Load mosaic
+            img, labels = self.load_mosaic(index)
+            shapes = None
+
+            # MixUp augmentation
+            if random.random() < hyp['mixup']:
+                img, labels = mixup(img, labels, *self.load_mosaic(random.randint(0, self.n - 1)))
+
+        else:
+            # Load image
+            img, (h0, w0), (h, w) = self.load_image(index)
+
+            # Letterbox
+            shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size  # final letterboxed shape
+            img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
+            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling
+
+            labels = self.labels[index].copy()
+            if labels.size:  # normalized xywh to pixel xyxy format
+                labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
+
+            if self.augment:
+                img, labels = random_perspective(img,
+                                                 labels,
+                                                 degrees=hyp['degrees'],
+                                                 translate=hyp['translate'],
+                                                 scale=hyp['scale'],
+                                                 shear=hyp['shear'],
+                                                 perspective=hyp['perspective'])
+
+        nl = len(labels)  # number of labels
+        if nl:
+            labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3)
+
+        if self.augment:
+            # Albumentations
+            img, labels = self.albumentations(img, labels)
+            nl = len(labels)  # update after albumentations
+
+            # HSV color-space
+            augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
+
+            # Flip up-down
+            if random.random() < hyp['flipud']:
+                img = np.flipud(img)
+                if nl:
+                    labels[:, 2] = 1 - labels[:, 2]
+
+            # Flip left-right
+            if random.random() < hyp['fliplr']:
+                img = np.fliplr(img)
+                if nl:
+                    labels[:, 1] = 1 - labels[:, 1]
+
+            # Cutouts
+            # labels = cutout(img, labels, p=0.5)
+            # nl = len(labels)  # update after cutout
+
+        labels_out = torch.zeros((nl, 6))
+        if nl:
+            labels_out[:, 1:] = torch.from_numpy(labels)
+
+        # Convert
+        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
+        img = np.ascontiguousarray(img)
+
+        return torch.from_numpy(img), labels_out, self.im_files[index], shapes
+
+    def load_image(self, i):
+        # Loads 1 image from dataset index 'i', returns (im, original hw, resized hw)
+        im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i],
+        if im is None:  # not cached in RAM
+            if fn.exists():  # load npy
+                im = np.load(fn)
+            else:  # read image
+                im = cv2.imread(f)  # BGR
+                assert im is not None, f'Image Not Found {f}'
+            h0, w0 = im.shape[:2]  # orig hw
+            r = self.img_size / max(h0, w0)  # ratio
+            if r != 1:  # if sizes are not equal
+                interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA
+                im = cv2.resize(im, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp)
+            return im, (h0, w0), im.shape[:2]  # im, hw_original, hw_resized
+        return self.ims[i], self.im_hw0[i], self.im_hw[i]  # im, hw_original, hw_resized
+
+    def cache_images_to_disk(self, i):
+        # Saves an image as an *.npy file for faster loading
+        f = self.npy_files[i]
+        if not f.exists():
+            np.save(f.as_posix(), cv2.imread(self.im_files[i]))
+
+    def load_mosaic(self, index):
+        # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
+        labels4, segments4 = [], []
+        s = self.img_size
+        yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border)  # mosaic center x, y
+        indices = [index] + random.choices(self.indices, k=3)  # 3 additional image indices
+        random.shuffle(indices)
+        for i, index in enumerate(indices):
+            # Load image
+            img, _, (h, w) = self.load_image(index)
+
+            # place img in img4
+            if i == 0:  # top left
+                img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
+            elif i == 1:  # top right
+                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
+                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
+            elif i == 2:  # bottom left
+                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
+            elif i == 3:  # bottom right
+                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
+
+            img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+            padw = x1a - x1b
+            padh = y1a - y1b
+
+            # Labels
+            labels, segments = self.labels[index].copy(), self.segments[index].copy()
+            if labels.size:
+                labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh)  # normalized xywh to pixel xyxy format
+                segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
+            labels4.append(labels)
+            segments4.extend(segments)
+
+        # Concat/clip labels
+        labels4 = np.concatenate(labels4, 0)
+        for x in (labels4[:, 1:], *segments4):
+            np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
+        # img4, labels4 = replicate(img4, labels4)  # replicate
+
+        # Augment
+        img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp['copy_paste'])
+        img4, labels4 = random_perspective(img4,
+                                           labels4,
+                                           segments4,
+                                           degrees=self.hyp['degrees'],
+                                           translate=self.hyp['translate'],
+                                           scale=self.hyp['scale'],
+                                           shear=self.hyp['shear'],
+                                           perspective=self.hyp['perspective'],
+                                           border=self.mosaic_border)  # border to remove
+
+        return img4, labels4
+
+    def load_mosaic9(self, index):
+        # YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic
+        labels9, segments9 = [], []
+        s = self.img_size
+        indices = [index] + random.choices(self.indices, k=8)  # 8 additional image indices
+        random.shuffle(indices)
+        hp, wp = -1, -1  # height, width previous
+        for i, index in enumerate(indices):
+            # Load image
+            img, _, (h, w) = self.load_image(index)
+
+            # place img in img9
+            if i == 0:  # center
+                img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+                h0, w0 = h, w
+                c = s, s, s + w, s + h  # xmin, ymin, xmax, ymax (base) coordinates
+            elif i == 1:  # top
+                c = s, s - h, s + w, s
+            elif i == 2:  # top right
+                c = s + wp, s - h, s + wp + w, s
+            elif i == 3:  # right
+                c = s + w0, s, s + w0 + w, s + h
+            elif i == 4:  # bottom right
+                c = s + w0, s + hp, s + w0 + w, s + hp + h
+            elif i == 5:  # bottom
+                c = s + w0 - w, s + h0, s + w0, s + h0 + h
+            elif i == 6:  # bottom left
+                c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h
+            elif i == 7:  # left
+                c = s - w, s + h0 - h, s, s + h0
+            elif i == 8:  # top left
+                c = s - w, s + h0 - hp - h, s, s + h0 - hp
+
+            padx, pady = c[:2]
+            x1, y1, x2, y2 = (max(x, 0) for x in c)  # allocate coords
+
+            # Labels
+            labels, segments = self.labels[index].copy(), self.segments[index].copy()
+            if labels.size:
+                labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady)  # normalized xywh to pixel xyxy format
+                segments = [xyn2xy(x, w, h, padx, pady) for x in segments]
+            labels9.append(labels)
+            segments9.extend(segments)
+
+            # Image
+            img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:]  # img9[ymin:ymax, xmin:xmax]
+            hp, wp = h, w  # height, width previous
+
+        # Offset
+        yc, xc = (int(random.uniform(0, s)) for _ in self.mosaic_border)  # mosaic center x, y
+        img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s]
+
+        # Concat/clip labels
+        labels9 = np.concatenate(labels9, 0)
+        labels9[:, [1, 3]] -= xc
+        labels9[:, [2, 4]] -= yc
+        c = np.array([xc, yc])  # centers
+        segments9 = [x - c for x in segments9]
+
+        for x in (labels9[:, 1:], *segments9):
+            np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
+        # img9, labels9 = replicate(img9, labels9)  # replicate
+
+        # Augment
+        img9, labels9, segments9 = copy_paste(img9, labels9, segments9, p=self.hyp['copy_paste'])
+        img9, labels9 = random_perspective(img9,
+                                           labels9,
+                                           segments9,
+                                           degrees=self.hyp['degrees'],
+                                           translate=self.hyp['translate'],
+                                           scale=self.hyp['scale'],
+                                           shear=self.hyp['shear'],
+                                           perspective=self.hyp['perspective'],
+                                           border=self.mosaic_border)  # border to remove
+
+        return img9, labels9
+
+    @staticmethod
+    def collate_fn(batch):
+        im, label, path, shapes = zip(*batch)  # transposed
+        for i, lb in enumerate(label):
+            lb[:, 0] = i  # add target image index for build_targets()
+        return torch.stack(im, 0), torch.cat(label, 0), path, shapes
+
+    @staticmethod
+    def collate_fn4(batch):
+        im, label, path, shapes = zip(*batch)  # transposed
+        n = len(shapes) // 4
+        im4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]
+
+        ho = torch.tensor([[0.0, 0, 0, 1, 0, 0]])
+        wo = torch.tensor([[0.0, 0, 1, 0, 0, 0]])
+        s = torch.tensor([[1, 1, 0.5, 0.5, 0.5, 0.5]])  # scale
+        for i in range(n):  # zidane torch.zeros(16,3,720,1280)  # BCHW
+            i *= 4
+            if random.random() < 0.5:
+                im1 = F.interpolate(im[i].unsqueeze(0).float(), scale_factor=2.0, mode='bilinear',
+                                    align_corners=False)[0].type(im[i].type())
+                lb = label[i]
+            else:
+                im1 = torch.cat((torch.cat((im[i], im[i + 1]), 1), torch.cat((im[i + 2], im[i + 3]), 1)), 2)
+                lb = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s
+            im4.append(im1)
+            label4.append(lb)
+
+        for i, lb in enumerate(label4):
+            lb[:, 0] = i  # add target image index for build_targets()
+
+        return torch.stack(im4, 0), torch.cat(label4, 0), path4, shapes4
+
+
+# Ancillary functions --------------------------------------------------------------------------------------------------
+def flatten_recursive(path=DATASETS_DIR / 'coco128'):
+    # Flatten a recursive directory by bringing all files to top level
+    new_path = Path(f'{str(path)}_flat')
+    if os.path.exists(new_path):
+        shutil.rmtree(new_path)  # delete output folder
+    os.makedirs(new_path)  # make new output folder
+    for file in tqdm(glob.glob(f'{str(Path(path))}/**/*.*', recursive=True)):
+        shutil.copyfile(file, new_path / Path(file).name)
+
+
+def extract_boxes(path=DATASETS_DIR / 'coco128'):  # from utils.dataloaders import *; extract_boxes()
+    # Convert detection dataset into classification dataset, with one directory per class
+    path = Path(path)  # images dir
+    shutil.rmtree(path / 'classification') if (path / 'classification').is_dir() else None  # remove existing
+    files = list(path.rglob('*.*'))
+    n = len(files)  # number of files
+    for im_file in tqdm(files, total=n):
+        if im_file.suffix[1:] in IMG_FORMATS:
+            # image
+            im = cv2.imread(str(im_file))[..., ::-1]  # BGR to RGB
+            h, w = im.shape[:2]
+
+            # labels
+            lb_file = Path(img2label_paths([str(im_file)])[0])
+            if Path(lb_file).exists():
+                with open(lb_file) as f:
+                    lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32)  # labels
+
+                for j, x in enumerate(lb):
+                    c = int(x[0])  # class
+                    f = (path / 'classifier') / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg'  # new filename
+                    if not f.parent.is_dir():
+                        f.parent.mkdir(parents=True)
+
+                    b = x[1:] * [w, h, w, h]  # box
+                    # b[2:] = b[2:].max()  # rectangle to square
+                    b[2:] = b[2:] * 1.2 + 3  # pad
+                    b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(int)
+
+                    b[[0, 2]] = np.clip(b[[0, 2]], 0, w)  # clip boxes outside of image
+                    b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
+                    assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'
+
+
+def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False):
+    """ Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
+    Usage: from utils.dataloaders import *; autosplit()
+    Arguments
+        path:            Path to images directory
+        weights:         Train, val, test weights (list, tuple)
+        annotated_only:  Only use images with an annotated txt file
+    """
+    path = Path(path)  # images dir
+    files = sorted(x for x in path.rglob('*.*') if x.suffix[1:].lower() in IMG_FORMATS)  # image files only
+    n = len(files)  # number of files
+    random.seed(0)  # for reproducibility
+    indices = random.choices([0, 1, 2], weights=weights, k=n)  # assign each image to a split
+
+    txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt']  # 3 txt files
+    for x in txt:
+        if (path.parent / x).exists():
+            (path.parent / x).unlink()  # remove existing
+
+    print(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only)
+    for i, img in tqdm(zip(indices, files), total=n):
+        if not annotated_only or Path(img2label_paths([str(img)])[0]).exists():  # check label
+            with open(path.parent / txt[i], 'a') as f:
+                f.write(f'./{img.relative_to(path.parent).as_posix()}' + '\n')  # add image to txt file
+
+
+def verify_image_label(args):
+    # Verify one image-label pair
+    im_file, lb_file, prefix = args
+    nm, nf, ne, nc, msg, segments = 0, 0, 0, 0, '', []  # number (missing, found, empty, corrupt), message, segments
+    try:
+        # verify images
+        im = Image.open(im_file)
+        im.verify()  # PIL verify
+        shape = exif_size(im)  # image size
+        assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels'
+        assert im.format.lower() in IMG_FORMATS, f'invalid image format {im.format}'
+        if im.format.lower() in ('jpg', 'jpeg'):
+            with open(im_file, 'rb') as f:
+                f.seek(-2, 2)
+                if f.read() != b'\xff\xd9':  # corrupt JPEG
+                    ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100)
+                    msg = f'{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved'
+
+        # verify labels
+        if os.path.isfile(lb_file):
+            nf = 1  # label found
+            with open(lb_file) as f:
+                lb = [x.split() for x in f.read().strip().splitlines() if len(x)]
+                if any(len(x) > 6 for x in lb):  # is segment
+                    classes = np.array([x[0] for x in lb], dtype=np.float32)
+                    segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in lb]  # (cls, xy1...)
+                    lb = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1)  # (cls, xywh)
+                lb = np.array(lb, dtype=np.float32)
+            nl = len(lb)
+            if nl:
+                assert lb.shape[1] == 5, f'labels require 5 columns, {lb.shape[1]} columns detected'
+                assert (lb >= 0).all(), f'negative label values {lb[lb < 0]}'
+                assert (lb[:, 1:] <= 1).all(), f'non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}'
+                _, i = np.unique(lb, axis=0, return_index=True)
+                if len(i) < nl:  # duplicate row check
+                    lb = lb[i]  # remove duplicates
+                    if segments:
+                        segments = [segments[x] for x in i]
+                    msg = f'{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed'
+            else:
+                ne = 1  # label empty
+                lb = np.zeros((0, 5), dtype=np.float32)
+        else:
+            nm = 1  # label missing
+            lb = np.zeros((0, 5), dtype=np.float32)
+        return im_file, lb, shape, segments, nm, nf, ne, nc, msg
+    except Exception as e:
+        nc = 1
+        msg = f'{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}'
+        return [None, None, None, None, nm, nf, ne, nc, msg]
+
+
+class HUBDatasetStats():
+    """ Class for generating HUB dataset JSON and `-hub` dataset directory
+
+    Arguments
+        path:           Path to data.yaml or data.zip (with data.yaml inside data.zip)
+        autodownload:   Attempt to download dataset if not found locally
+
+    Usage
+        from utils.dataloaders import HUBDatasetStats
+        stats = HUBDatasetStats('coco128.yaml', autodownload=True)  # usage 1
+        stats = HUBDatasetStats('path/to/coco128.zip')  # usage 2
+        stats.get_json(save=False)
+        stats.process_images()
+    """
+
+    def __init__(self, path='coco128.yaml', autodownload=False):
+        # Initialize class
+        zipped, data_dir, yaml_path = self._unzip(Path(path))
+        try:
+            with open(check_yaml(yaml_path), errors='ignore') as f:
+                data = yaml.safe_load(f)  # data dict
+                if zipped:
+                    data['path'] = data_dir
+        except Exception as e:
+            raise Exception('error/HUB/dataset_stats/yaml_load') from e
+
+        check_dataset(data, autodownload)  # download dataset if missing
+        self.hub_dir = Path(data['path'] + '-hub')
+        self.im_dir = self.hub_dir / 'images'
+        self.im_dir.mkdir(parents=True, exist_ok=True)  # makes /images
+        self.stats = {'nc': data['nc'], 'names': list(data['names'].values())}  # statistics dictionary
+        self.data = data
+
+    @staticmethod
+    def _find_yaml(dir):
+        # Return data.yaml file
+        files = list(dir.glob('*.yaml')) or list(dir.rglob('*.yaml'))  # try root level first and then recursive
+        assert files, f'No *.yaml file found in {dir}'
+        if len(files) > 1:
+            files = [f for f in files if f.stem == dir.stem]  # prefer *.yaml files that match dir name
+            assert files, f'Multiple *.yaml files found in {dir}, only 1 *.yaml file allowed'
+        assert len(files) == 1, f'Multiple *.yaml files found: {files}, only 1 *.yaml file allowed in {dir}'
+        return files[0]
+
+    def _unzip(self, path):
+        # Unzip data.zip
+        if not str(path).endswith('.zip'):  # path is data.yaml
+            return False, None, path
+        assert Path(path).is_file(), f'Error unzipping {path}, file not found'
+        unzip_file(path, path=path.parent)
+        dir = path.with_suffix('')  # dataset directory == zip name
+        assert dir.is_dir(), f'Error unzipping {path}, {dir} not found. path/to/abc.zip MUST unzip to path/to/abc/'
+        return True, str(dir), self._find_yaml(dir)  # zipped, data_dir, yaml_path
+
+    def _hub_ops(self, f, max_dim=1920):
+        # HUB ops for 1 image 'f': resize and save at reduced quality in /dataset-hub for web/app viewing
+        f_new = self.im_dir / Path(f).name  # dataset-hub image filename
+        try:  # use PIL
+            im = Image.open(f)
+            r = max_dim / max(im.height, im.width)  # ratio
+            if r < 1.0:  # image too large
+                im = im.resize((int(im.width * r), int(im.height * r)))
+            im.save(f_new, 'JPEG', quality=50, optimize=True)  # save
+        except Exception as e:  # use OpenCV
+            LOGGER.info(f'WARNING ⚠️ HUB ops PIL failure {f}: {e}')
+            im = cv2.imread(f)
+            im_height, im_width = im.shape[:2]
+            r = max_dim / max(im_height, im_width)  # ratio
+            if r < 1.0:  # image too large
+                im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_AREA)
+            cv2.imwrite(str(f_new), im)
+
+    def get_json(self, save=False, verbose=False):
+        # Return dataset JSON for Ultralytics HUB
+        def _round(labels):
+            # Update labels to integer class and 6 decimal place floats
+            return [[int(c), *(round(x, 4) for x in points)] for c, *points in labels]
+
+        for split in 'train', 'val', 'test':
+            if self.data.get(split) is None:
+                self.stats[split] = None  # i.e. no test set
+                continue
+            dataset = LoadImagesAndLabels(self.data[split])  # load dataset
+            x = np.array([
+                np.bincount(label[:, 0].astype(int), minlength=self.data['nc'])
+                for label in tqdm(dataset.labels, total=dataset.n, desc='Statistics')])  # shape(128x80)
+            self.stats[split] = {
+                'instance_stats': {
+                    'total': int(x.sum()),
+                    'per_class': x.sum(0).tolist()},
+                'image_stats': {
+                    'total': dataset.n,
+                    'unlabelled': int(np.all(x == 0, 1).sum()),
+                    'per_class': (x > 0).sum(0).tolist()},
+                'labels': [{
+                    str(Path(k).name): _round(v.tolist())} for k, v in zip(dataset.im_files, dataset.labels)]}
+
+        # Save, print and return
+        if save:
+            stats_path = self.hub_dir / 'stats.json'
+            print(f'Saving {stats_path.resolve()}...')
+            with open(stats_path, 'w') as f:
+                json.dump(self.stats, f)  # save stats.json
+        if verbose:
+            print(json.dumps(self.stats, indent=2, sort_keys=False))
+        return self.stats
+
+    def process_images(self):
+        # Compress images for Ultralytics HUB
+        for split in 'train', 'val', 'test':
+            if self.data.get(split) is None:
+                continue
+            dataset = LoadImagesAndLabels(self.data[split])  # load dataset
+            desc = f'{split} images'
+            for _ in tqdm(ThreadPool(NUM_THREADS).imap(self._hub_ops, dataset.im_files), total=dataset.n, desc=desc):
+                pass
+        print(f'Done. All images saved to {self.im_dir}')
+        return self.im_dir
+
+
+# Classification dataloaders -------------------------------------------------------------------------------------------
+class ClassificationDataset(torchvision.datasets.ImageFolder):
+    """
+    YOLOv5 Classification Dataset.
+    Arguments
+        root:  Dataset path
+        transform:  torchvision transforms, used by default
+        album_transform: Albumentations transforms, used if installed
+    """
+
+    def __init__(self, root, augment, imgsz, cache=False):
+        super().__init__(root=root)
+        self.torch_transforms = classify_transforms(imgsz)
+        self.album_transforms = classify_albumentations(augment, imgsz) if augment else None
+        self.cache_ram = cache is True or cache == 'ram'
+        self.cache_disk = cache == 'disk'
+        self.samples = [list(x) + [Path(x[0]).with_suffix('.npy'), None] for x in self.samples]  # file, index, npy, im
+
+    def __getitem__(self, i):
+        f, j, fn, im = self.samples[i]  # filename, index, filename.with_suffix('.npy'), image
+        if self.cache_ram and im is None:
+            im = self.samples[i][3] = cv2.imread(f)
+        elif self.cache_disk:
+            if not fn.exists():  # load npy
+                np.save(fn.as_posix(), cv2.imread(f))
+            im = np.load(fn)
+        else:  # read image
+            im = cv2.imread(f)  # BGR
+        if self.album_transforms:
+            sample = self.album_transforms(image=cv2.cvtColor(im, cv2.COLOR_BGR2RGB))['image']
+        else:
+            sample = self.torch_transforms(im)
+        return sample, j
+
+
+def create_classification_dataloader(path,
+                                     imgsz=224,
+                                     batch_size=16,
+                                     augment=True,
+                                     cache=False,
+                                     rank=-1,
+                                     workers=8,
+                                     shuffle=True):
+    # Returns Dataloader object to be used with YOLOv5 Classifier
+    with torch_distributed_zero_first(rank):  # init dataset *.cache only once if DDP
+        dataset = ClassificationDataset(root=path, imgsz=imgsz, augment=augment, cache=cache)
+    batch_size = min(batch_size, len(dataset))
+    nd = torch.cuda.device_count()
+    nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers])
+    sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
+    generator = torch.Generator()
+    generator.manual_seed(6148914691236517205 + RANK)
+    return InfiniteDataLoader(dataset,
+                              batch_size=batch_size,
+                              shuffle=shuffle and sampler is None,
+                              num_workers=nw,
+                              sampler=sampler,
+                              pin_memory=PIN_MEMORY,
+                              worker_init_fn=seed_worker,
+                              generator=generator)  # or DataLoader(persistent_workers=True)
diff --git a/utils/datasets.py b/utils/datasets.py
deleted file mode 100755
index 3fcdddd7c013..000000000000
--- a/utils/datasets.py
+++ /dev/null
@@ -1,1064 +0,0 @@
-# Dataset utils and dataloaders
-
-import glob
-import logging
-import math
-import os
-import random
-import shutil
-import time
-from itertools import repeat
-from multiprocessing.pool import ThreadPool
-from pathlib import Path
-from threading import Thread
-
-import cv2
-import numpy as np
-import torch
-import torch.nn.functional as F
-from PIL import Image, ExifTags
-from torch.utils.data import Dataset
-from tqdm import tqdm
-
-from utils.general import check_requirements, xyxy2xywh, xywh2xyxy, xywhn2xyxy, xyn2xy, segment2box, segments2boxes, \
-    resample_segments, clean_str
-from utils.torch_utils import torch_distributed_zero_first
-
-# Parameters
-help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
-img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo']  # acceptable image suffixes
-vid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv']  # acceptable video suffixes
-logger = logging.getLogger(__name__)
-
-# Get orientation exif tag
-for orientation in ExifTags.TAGS.keys():
-    if ExifTags.TAGS[orientation] == 'Orientation':
-        break
-
-
-def get_hash(files):
-    # Returns a single hash value of a list of files
-    return sum(os.path.getsize(f) for f in files if os.path.isfile(f))
-
-
-def exif_size(img):
-    # Returns exif-corrected PIL size
-    s = img.size  # (width, height)
-    try:
-        rotation = dict(img._getexif().items())[orientation]
-        if rotation == 6:  # rotation 270
-            s = (s[1], s[0])
-        elif rotation == 8:  # rotation 90
-            s = (s[1], s[0])
-    except:
-        pass
-
-    return s
-
-
-def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False,
-                      rank=-1, world_size=1, workers=8, image_weights=False, quad=False, prefix=''):
-    # Make sure only the first process in DDP process the dataset first, and the following others can use the cache
-    with torch_distributed_zero_first(rank):
-        dataset = LoadImagesAndLabels(path, imgsz, batch_size,
-                                      augment=augment,  # augment images
-                                      hyp=hyp,  # augmentation hyperparameters
-                                      rect=rect,  # rectangular training
-                                      cache_images=cache,
-                                      single_cls=opt.single_cls,
-                                      stride=int(stride),
-                                      pad=pad,
-                                      image_weights=image_weights,
-                                      prefix=prefix)
-
-    batch_size = min(batch_size, len(dataset))
-    nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers])  # number of workers
-    sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None
-    loader = torch.utils.data.DataLoader if image_weights else InfiniteDataLoader
-    # Use torch.utils.data.DataLoader() if dataset.properties will update during training else InfiniteDataLoader()
-    dataloader = loader(dataset,
-                        batch_size=batch_size,
-                        num_workers=nw,
-                        sampler=sampler,
-                        pin_memory=True,
-                        collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn)
-    return dataloader, dataset
-
-
-class InfiniteDataLoader(torch.utils.data.dataloader.DataLoader):
-    """ Dataloader that reuses workers
-
-    Uses same syntax as vanilla DataLoader
-    """
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
-        self.iterator = super().__iter__()
-
-    def __len__(self):
-        return len(self.batch_sampler.sampler)
-
-    def __iter__(self):
-        for i in range(len(self)):
-            yield next(self.iterator)
-
-
-class _RepeatSampler(object):
-    """ Sampler that repeats forever
-
-    Args:
-        sampler (Sampler)
-    """
-
-    def __init__(self, sampler):
-        self.sampler = sampler
-
-    def __iter__(self):
-        while True:
-            yield from iter(self.sampler)
-
-
-class LoadImages:  # for inference
-    def __init__(self, path, img_size=640, stride=32):
-        p = str(Path(path).absolute())  # os-agnostic absolute path
-        if '*' in p:
-            files = sorted(glob.glob(p, recursive=True))  # glob
-        elif os.path.isdir(p):
-            files = sorted(glob.glob(os.path.join(p, '*.*')))  # dir
-        elif os.path.isfile(p):
-            files = [p]  # files
-        else:
-            raise Exception(f'ERROR: {p} does not exist')
-
-        images = [x for x in files if x.split('.')[-1].lower() in img_formats]
-        videos = [x for x in files if x.split('.')[-1].lower() in vid_formats]
-        ni, nv = len(images), len(videos)
-
-        self.img_size = img_size
-        self.stride = stride
-        self.files = images + videos
-        self.nf = ni + nv  # number of files
-        self.video_flag = [False] * ni + [True] * nv
-        self.mode = 'image'
-        if any(videos):
-            self.new_video(videos[0])  # new video
-        else:
-            self.cap = None
-        assert self.nf > 0, f'No images or videos found in {p}. ' \
-                            f'Supported formats are:\nimages: {img_formats}\nvideos: {vid_formats}'
-
-    def __iter__(self):
-        self.count = 0
-        return self
-
-    def __next__(self):
-        if self.count == self.nf:
-            raise StopIteration
-        path = self.files[self.count]
-
-        if self.video_flag[self.count]:
-            # Read video
-            self.mode = 'video'
-            ret_val, img0 = self.cap.read()
-            if not ret_val:
-                self.count += 1
-                self.cap.release()
-                if self.count == self.nf:  # last video
-                    raise StopIteration
-                else:
-                    path = self.files[self.count]
-                    self.new_video(path)
-                    ret_val, img0 = self.cap.read()
-
-            self.frame += 1
-            print(f'video {self.count + 1}/{self.nf} ({self.frame}/{self.nframes}) {path}: ', end='')
-
-        else:
-            # Read image
-            self.count += 1
-            img0 = cv2.imread(path)  # BGR
-            assert img0 is not None, 'Image Not Found ' + path
-            print(f'image {self.count}/{self.nf} {path}: ', end='')
-
-        # Padded resize
-        img = letterbox(img0, self.img_size, stride=self.stride)[0]
-
-        # Convert
-        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
-        img = np.ascontiguousarray(img)
-
-        return path, img, img0, self.cap
-
-    def new_video(self, path):
-        self.frame = 0
-        self.cap = cv2.VideoCapture(path)
-        self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
-
-    def __len__(self):
-        return self.nf  # number of files
-
-
-class LoadWebcam:  # for inference
-    def __init__(self, pipe='0', img_size=640, stride=32):
-        self.img_size = img_size
-        self.stride = stride
-
-        if pipe.isnumeric():
-            pipe = eval(pipe)  # local camera
-        # pipe = 'rtsp://192.168.1.64/1'  # IP camera
-        # pipe = 'rtsp://username:password@192.168.1.64/1'  # IP camera with login
-        # pipe = 'http://wmccpinetop.axiscam.net/mjpg/video.mjpg'  # IP golf camera
-
-        self.pipe = pipe
-        self.cap = cv2.VideoCapture(pipe)  # video capture object
-        self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3)  # set buffer size
-
-    def __iter__(self):
-        self.count = -1
-        return self
-
-    def __next__(self):
-        self.count += 1
-        if cv2.waitKey(1) == ord('q'):  # q to quit
-            self.cap.release()
-            cv2.destroyAllWindows()
-            raise StopIteration
-
-        # Read frame
-        if self.pipe == 0:  # local camera
-            ret_val, img0 = self.cap.read()
-            img0 = cv2.flip(img0, 1)  # flip left-right
-        else:  # IP camera
-            n = 0
-            while True:
-                n += 1
-                self.cap.grab()
-                if n % 30 == 0:  # skip frames
-                    ret_val, img0 = self.cap.retrieve()
-                    if ret_val:
-                        break
-
-        # Print
-        assert ret_val, f'Camera Error {self.pipe}'
-        img_path = 'webcam.jpg'
-        print(f'webcam {self.count}: ', end='')
-
-        # Padded resize
-        img = letterbox(img0, self.img_size, stride=self.stride)[0]
-
-        # Convert
-        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
-        img = np.ascontiguousarray(img)
-
-        return img_path, img, img0, None
-
-    def __len__(self):
-        return 0
-
-
-class LoadStreams:  # multiple IP or RTSP cameras
-    def __init__(self, sources='streams.txt', img_size=640, stride=32):
-        self.mode = 'stream'
-        self.img_size = img_size
-        self.stride = stride
-
-        if os.path.isfile(sources):
-            with open(sources, 'r') as f:
-                sources = [x.strip() for x in f.read().strip().splitlines() if len(x.strip())]
-        else:
-            sources = [sources]
-
-        n = len(sources)
-        self.imgs = [None] * n
-        self.sources = [clean_str(x) for x in sources]  # clean source names for later
-        for i, s in enumerate(sources):  # index, source
-            # Start thread to read frames from video stream
-            print(f'{i + 1}/{n}: {s}... ', end='')
-            if 'youtube.com/' in s or 'youtu.be/' in s:  # if source is YouTube video
-                check_requirements(('pafy', 'youtube_dl'))
-                import pafy
-                s = pafy.new(s).getbest(preftype="mp4").url  # YouTube URL
-            s = eval(s) if s.isnumeric() else s  # i.e. s = '0' local webcam
-            cap = cv2.VideoCapture(s)
-            assert cap.isOpened(), f'Failed to open {s}'
-            w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-            h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-            self.fps = cap.get(cv2.CAP_PROP_FPS) % 100
-
-            _, self.imgs[i] = cap.read()  # guarantee first frame
-            thread = Thread(target=self.update, args=([i, cap]), daemon=True)
-            print(f' success ({w}x{h} at {self.fps:.2f} FPS).')
-            thread.start()
-        print('')  # newline
-
-        # check for common shapes
-        s = np.stack([letterbox(x, self.img_size, stride=self.stride)[0].shape for x in self.imgs], 0)  # shapes
-        self.rect = np.unique(s, axis=0).shape[0] == 1  # rect inference if all shapes equal
-        if not self.rect:
-            print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
-
-    def update(self, index, cap):
-        # Read next stream frame in a daemon thread
-        n = 0
-        while cap.isOpened():
-            n += 1
-            # _, self.imgs[index] = cap.read()
-            cap.grab()
-            if n == 4:  # read every 4th frame
-                success, im = cap.retrieve()
-                self.imgs[index] = im if success else self.imgs[index] * 0
-                n = 0
-            time.sleep(1 / self.fps)  # wait time
-
-    def __iter__(self):
-        self.count = -1
-        return self
-
-    def __next__(self):
-        self.count += 1
-        img0 = self.imgs.copy()
-        if cv2.waitKey(1) == ord('q'):  # q to quit
-            cv2.destroyAllWindows()
-            raise StopIteration
-
-        # Letterbox
-        img = [letterbox(x, self.img_size, auto=self.rect, stride=self.stride)[0] for x in img0]
-
-        # Stack
-        img = np.stack(img, 0)
-
-        # Convert
-        img = img[:, :, :, ::-1].transpose(0, 3, 1, 2)  # BGR to RGB, to bsx3x416x416
-        img = np.ascontiguousarray(img)
-
-        return self.sources, img, img0, None
-
-    def __len__(self):
-        return 0  # 1E12 frames = 32 streams at 30 FPS for 30 years
-
-
-def img2label_paths(img_paths):
-    # Define label paths as a function of image paths
-    sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep  # /images/, /labels/ substrings
-    return ['txt'.join(x.replace(sa, sb, 1).rsplit(x.split('.')[-1], 1)) for x in img_paths]
-
-
-class LoadImagesAndLabels(Dataset):  # for training/testing
-    def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
-                 cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''):
-        self.img_size = img_size
-        self.augment = augment
-        self.hyp = hyp
-        self.image_weights = image_weights
-        self.rect = False if image_weights else rect
-        self.mosaic = self.augment and not self.rect  # load 4 images at a time into a mosaic (only during training)
-        self.mosaic_border = [-img_size // 2, -img_size // 2]
-        self.stride = stride
-        self.path = path
-
-        try:
-            f = []  # image files
-            for p in path if isinstance(path, list) else [path]:
-                p = Path(p)  # os-agnostic
-                if p.is_dir():  # dir
-                    f += glob.glob(str(p / '**' / '*.*'), recursive=True)
-                    # f = list(p.rglob('**/*.*'))  # pathlib
-                elif p.is_file():  # file
-                    with open(p, 'r') as t:
-                        t = t.read().strip().splitlines()
-                        parent = str(p.parent) + os.sep
-                        f += [x.replace('./', parent) if x.startswith('./') else x for x in t]  # local to global path
-                        # f += [p.parent / x.lstrip(os.sep) for x in t]  # local to global path (pathlib)
-                else:
-                    raise Exception(f'{prefix}{p} does not exist')
-            self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats])
-            # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in img_formats])  # pathlib
-            assert self.img_files, f'{prefix}No images found'
-        except Exception as e:
-            raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {help_url}')
-
-        # Check cache
-        self.label_files = img2label_paths(self.img_files)  # labels
-        cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache')  # cached labels
-        if cache_path.is_file():
-            cache, exists = torch.load(cache_path), True  # load
-            if cache['hash'] != get_hash(self.label_files + self.img_files) or 'version' not in cache:  # changed
-                cache, exists = self.cache_labels(cache_path, prefix), False  # re-cache
-        else:
-            cache, exists = self.cache_labels(cache_path, prefix), False  # cache
-
-        # Display cache
-        nf, nm, ne, nc, n = cache.pop('results')  # found, missing, empty, corrupted, total
-        if exists:
-            d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"
-            tqdm(None, desc=prefix + d, total=n, initial=n)  # display cache results
-        assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {help_url}'
-
-        # Read cache
-        cache.pop('hash')  # remove hash
-        cache.pop('version')  # remove version
-        labels, shapes, self.segments = zip(*cache.values())
-        self.labels = list(labels)
-        self.shapes = np.array(shapes, dtype=np.float64)
-        self.img_files = list(cache.keys())  # update
-        self.label_files = img2label_paths(cache.keys())  # update
-        if single_cls:
-            for x in self.labels:
-                x[:, 0] = 0
-
-        n = len(shapes)  # number of images
-        bi = np.floor(np.arange(n) / batch_size).astype(np.int)  # batch index
-        nb = bi[-1] + 1  # number of batches
-        self.batch = bi  # batch index of image
-        self.n = n
-        self.indices = range(n)
-
-        # Rectangular Training
-        if self.rect:
-            # Sort by aspect ratio
-            s = self.shapes  # wh
-            ar = s[:, 1] / s[:, 0]  # aspect ratio
-            irect = ar.argsort()
-            self.img_files = [self.img_files[i] for i in irect]
-            self.label_files = [self.label_files[i] for i in irect]
-            self.labels = [self.labels[i] for i in irect]
-            self.shapes = s[irect]  # wh
-            ar = ar[irect]
-
-            # Set training image shapes
-            shapes = [[1, 1]] * nb
-            for i in range(nb):
-                ari = ar[bi == i]
-                mini, maxi = ari.min(), ari.max()
-                if maxi < 1:
-                    shapes[i] = [maxi, 1]
-                elif mini > 1:
-                    shapes[i] = [1, 1 / mini]
-
-            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
-
-        # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
-        self.imgs = [None] * n
-        if cache_images:
-            gb = 0  # Gigabytes of cached images
-            self.img_hw0, self.img_hw = [None] * n, [None] * n
-            results = ThreadPool(8).imap(lambda x: load_image(*x), zip(repeat(self), range(n)))  # 8 threads
-            pbar = tqdm(enumerate(results), total=n)
-            for i, x in pbar:
-                self.imgs[i], self.img_hw0[i], self.img_hw[i] = x  # img, hw_original, hw_resized = load_image(self, i)
-                gb += self.imgs[i].nbytes
-                pbar.desc = f'{prefix}Caching images ({gb / 1E9:.1f}GB)'
-            pbar.close()
-
-    def cache_labels(self, path=Path('./labels.cache'), prefix=''):
-        # Cache dataset labels, check images and read shapes
-        x = {}  # dict
-        nm, nf, ne, nc = 0, 0, 0, 0  # number missing, found, empty, duplicate
-        pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files))
-        for i, (im_file, lb_file) in enumerate(pbar):
-            try:
-                # verify images
-                im = Image.open(im_file)
-                im.verify()  # PIL verify
-                shape = exif_size(im)  # image size
-                segments = []  # instance segments
-                assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels'
-                assert im.format.lower() in img_formats, f'invalid image format {im.format}'
-
-                # verify labels
-                if os.path.isfile(lb_file):
-                    nf += 1  # label found
-                    with open(lb_file, 'r') as f:
-                        l = [x.split() for x in f.read().strip().splitlines()]
-                        if any([len(x) > 8 for x in l]):  # is segment
-                            classes = np.array([x[0] for x in l], dtype=np.float32)
-                            segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l]  # (cls, xy1...)
-                            l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1)  # (cls, xywh)
-                        l = np.array(l, dtype=np.float32)
-                    if len(l):
-                        assert l.shape[1] == 5, 'labels require 5 columns each'
-                        assert (l >= 0).all(), 'negative labels'
-                        assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
-                        assert np.unique(l, axis=0).shape[0] == l.shape[0], 'duplicate labels'
-                    else:
-                        ne += 1  # label empty
-                        l = np.zeros((0, 5), dtype=np.float32)
-                else:
-                    nm += 1  # label missing
-                    l = np.zeros((0, 5), dtype=np.float32)
-                x[im_file] = [l, shape, segments]
-            except Exception as e:
-                nc += 1
-                print(f'{prefix}WARNING: Ignoring corrupted image and/or label {im_file}: {e}')
-
-            pbar.desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels... " \
-                        f"{nf} found, {nm} missing, {ne} empty, {nc} corrupted"
-        pbar.close()
-
-        if nf == 0:
-            print(f'{prefix}WARNING: No labels found in {path}. See {help_url}')
-
-        x['hash'] = get_hash(self.label_files + self.img_files)
-        x['results'] = nf, nm, ne, nc, i + 1
-        x['version'] = 0.1  # cache version
-        torch.save(x, path)  # save for next time
-        logging.info(f'{prefix}New cache created: {path}')
-        return x
-
-    def __len__(self):
-        return len(self.img_files)
-
-    # def __iter__(self):
-    #     self.count = -1
-    #     print('ran dataset iter')
-    #     #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
-    #     return self
-
-    def __getitem__(self, index):
-        index = self.indices[index]  # linear, shuffled, or image_weights
-
-        hyp = self.hyp
-        mosaic = self.mosaic and random.random() < hyp['mosaic']
-        if mosaic:
-            # Load mosaic
-            img, labels = load_mosaic(self, index)
-            shapes = None
-
-            # MixUp https://arxiv.org/pdf/1710.09412.pdf
-            if random.random() < hyp['mixup']:
-                img2, labels2 = load_mosaic(self, random.randint(0, self.n - 1))
-                r = np.random.beta(8.0, 8.0)  # mixup ratio, alpha=beta=8.0
-                img = (img * r + img2 * (1 - r)).astype(np.uint8)
-                labels = np.concatenate((labels, labels2), 0)
-
-        else:
-            # Load image
-            img, (h0, w0), (h, w) = load_image(self, index)
-
-            # Letterbox
-            shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size  # final letterboxed shape
-            img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
-            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling
-
-            labels = self.labels[index].copy()
-            if labels.size:  # normalized xywh to pixel xyxy format
-                labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
-
-        if self.augment:
-            # Augment imagespace
-            if not mosaic:
-                img, labels = random_perspective(img, labels,
-                                                 degrees=hyp['degrees'],
-                                                 translate=hyp['translate'],
-                                                 scale=hyp['scale'],
-                                                 shear=hyp['shear'],
-                                                 perspective=hyp['perspective'])
-
-            # Augment colorspace
-            augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
-
-            # Apply cutouts
-            # if random.random() < 0.9:
-            #     labels = cutout(img, labels)
-
-        nL = len(labels)  # number of labels
-        if nL:
-            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])  # convert xyxy to xywh
-            labels[:, [2, 4]] /= img.shape[0]  # normalized height 0-1
-            labels[:, [1, 3]] /= img.shape[1]  # normalized width 0-1
-
-        if self.augment:
-            # flip up-down
-            if random.random() < hyp['flipud']:
-                img = np.flipud(img)
-                if nL:
-                    labels[:, 2] = 1 - labels[:, 2]
-
-            # flip left-right
-            if random.random() < hyp['fliplr']:
-                img = np.fliplr(img)
-                if nL:
-                    labels[:, 1] = 1 - labels[:, 1]
-
-        labels_out = torch.zeros((nL, 6))
-        if nL:
-            labels_out[:, 1:] = torch.from_numpy(labels)
-
-        # Convert
-        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
-        img = np.ascontiguousarray(img)
-
-        return torch.from_numpy(img), labels_out, self.img_files[index], shapes
-
-    @staticmethod
-    def collate_fn(batch):
-        img, label, path, shapes = zip(*batch)  # transposed
-        for i, l in enumerate(label):
-            l[:, 0] = i  # add target image index for build_targets()
-        return torch.stack(img, 0), torch.cat(label, 0), path, shapes
-
-    @staticmethod
-    def collate_fn4(batch):
-        img, label, path, shapes = zip(*batch)  # transposed
-        n = len(shapes) // 4
-        img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]
-
-        ho = torch.tensor([[0., 0, 0, 1, 0, 0]])
-        wo = torch.tensor([[0., 0, 1, 0, 0, 0]])
-        s = torch.tensor([[1, 1, .5, .5, .5, .5]])  # scale
-        for i in range(n):  # zidane torch.zeros(16,3,720,1280)  # BCHW
-            i *= 4
-            if random.random() < 0.5:
-                im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2., mode='bilinear', align_corners=False)[
-                    0].type(img[i].type())
-                l = label[i]
-            else:
-                im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1)), 2)
-                l = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s
-            img4.append(im)
-            label4.append(l)
-
-        for i, l in enumerate(label4):
-            l[:, 0] = i  # add target image index for build_targets()
-
-        return torch.stack(img4, 0), torch.cat(label4, 0), path4, shapes4
-
-
-# Ancillary functions --------------------------------------------------------------------------------------------------
-def load_image(self, index):
-    # loads 1 image from dataset, returns img, original hw, resized hw
-    img = self.imgs[index]
-    if img is None:  # not cached
-        path = self.img_files[index]
-        img = cv2.imread(path)  # BGR
-        assert img is not None, 'Image Not Found ' + path
-        h0, w0 = img.shape[:2]  # orig hw
-        r = self.img_size / max(h0, w0)  # ratio
-        if r != 1:  # if sizes are not equal
-            img = cv2.resize(img, (int(w0 * r), int(h0 * r)),
-                             interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR)
-        return img, (h0, w0), img.shape[:2]  # img, hw_original, hw_resized
-    else:
-        return self.imgs[index], self.img_hw0[index], self.img_hw[index]  # img, hw_original, hw_resized
-
-
-def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
-    r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1  # random gains
-    hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
-    dtype = img.dtype  # uint8
-
-    x = np.arange(0, 256, dtype=np.int16)
-    lut_hue = ((x * r[0]) % 180).astype(dtype)
-    lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
-    lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
-
-    img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
-    cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)  # no return needed
-
-
-def hist_equalize(img, clahe=True, bgr=False):
-    # Equalize histogram on BGR image 'img' with img.shape(n,m,3) and range 0-255
-    yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)
-    if clahe:
-        c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
-        yuv[:, :, 0] = c.apply(yuv[:, :, 0])
-    else:
-        yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0])  # equalize Y channel histogram
-    return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB)  # convert YUV image to RGB
-
-
-def load_mosaic(self, index):
-    # loads images in a 4-mosaic
-
-    labels4, segments4 = [], []
-    s = self.img_size
-    yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border]  # mosaic center x, y
-    indices = [index] + random.choices(self.indices, k=3)  # 3 additional image indices
-    for i, index in enumerate(indices):
-        # Load image
-        img, _, (h, w) = load_image(self, index)
-
-        # place img in img4
-        if i == 0:  # top left
-            img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
-            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
-            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
-        elif i == 1:  # top right
-            x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
-            x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
-        elif i == 2:  # bottom left
-            x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
-            x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
-        elif i == 3:  # bottom right
-            x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
-            x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
-
-        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
-        padw = x1a - x1b
-        padh = y1a - y1b
-
-        # Labels
-        labels, segments = self.labels[index].copy(), self.segments[index].copy()
-        if labels.size:
-            labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh)  # normalized xywh to pixel xyxy format
-            segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
-        labels4.append(labels)
-        segments4.extend(segments)
-
-    # Concat/clip labels
-    labels4 = np.concatenate(labels4, 0)
-    for x in (labels4[:, 1:], *segments4):
-        np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
-    # img4, labels4 = replicate(img4, labels4)  # replicate
-
-    # Augment
-    img4, labels4 = random_perspective(img4, labels4, segments4,
-                                       degrees=self.hyp['degrees'],
-                                       translate=self.hyp['translate'],
-                                       scale=self.hyp['scale'],
-                                       shear=self.hyp['shear'],
-                                       perspective=self.hyp['perspective'],
-                                       border=self.mosaic_border)  # border to remove
-
-    return img4, labels4
-
-
-def load_mosaic9(self, index):
-    # loads images in a 9-mosaic
-
-    labels9, segments9 = [], []
-    s = self.img_size
-    indices = [index] + random.choices(self.indices, k=8)  # 8 additional image indices
-    for i, index in enumerate(indices):
-        # Load image
-        img, _, (h, w) = load_image(self, index)
-
-        # place img in img9
-        if i == 0:  # center
-            img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
-            h0, w0 = h, w
-            c = s, s, s + w, s + h  # xmin, ymin, xmax, ymax (base) coordinates
-        elif i == 1:  # top
-            c = s, s - h, s + w, s
-        elif i == 2:  # top right
-            c = s + wp, s - h, s + wp + w, s
-        elif i == 3:  # right
-            c = s + w0, s, s + w0 + w, s + h
-        elif i == 4:  # bottom right
-            c = s + w0, s + hp, s + w0 + w, s + hp + h
-        elif i == 5:  # bottom
-            c = s + w0 - w, s + h0, s + w0, s + h0 + h
-        elif i == 6:  # bottom left
-            c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h
-        elif i == 7:  # left
-            c = s - w, s + h0 - h, s, s + h0
-        elif i == 8:  # top left
-            c = s - w, s + h0 - hp - h, s, s + h0 - hp
-
-        padx, pady = c[:2]
-        x1, y1, x2, y2 = [max(x, 0) for x in c]  # allocate coords
-
-        # Labels
-        labels, segments = self.labels[index].copy(), self.segments[index].copy()
-        if labels.size:
-            labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady)  # normalized xywh to pixel xyxy format
-            segments = [xyn2xy(x, w, h, padx, pady) for x in segments]
-        labels9.append(labels)
-        segments9.extend(segments)
-
-        # Image
-        img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:]  # img9[ymin:ymax, xmin:xmax]
-        hp, wp = h, w  # height, width previous
-
-    # Offset
-    yc, xc = [int(random.uniform(0, s)) for _ in self.mosaic_border]  # mosaic center x, y
-    img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s]
-
-    # Concat/clip labels
-    labels9 = np.concatenate(labels9, 0)
-    labels9[:, [1, 3]] -= xc
-    labels9[:, [2, 4]] -= yc
-    c = np.array([xc, yc])  # centers
-    segments9 = [x - c for x in segments9]
-
-    for x in (labels9[:, 1:], *segments9):
-        np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
-    # img9, labels9 = replicate(img9, labels9)  # replicate
-
-    # Augment
-    img9, labels9 = random_perspective(img9, labels9, segments9,
-                                       degrees=self.hyp['degrees'],
-                                       translate=self.hyp['translate'],
-                                       scale=self.hyp['scale'],
-                                       shear=self.hyp['shear'],
-                                       perspective=self.hyp['perspective'],
-                                       border=self.mosaic_border)  # border to remove
-
-    return img9, labels9
-
-
-def replicate(img, labels):
-    # Replicate labels
-    h, w = img.shape[:2]
-    boxes = labels[:, 1:].astype(int)
-    x1, y1, x2, y2 = boxes.T
-    s = ((x2 - x1) + (y2 - y1)) / 2  # side length (pixels)
-    for i in s.argsort()[:round(s.size * 0.5)]:  # smallest indices
-        x1b, y1b, x2b, y2b = boxes[i]
-        bh, bw = y2b - y1b, x2b - x1b
-        yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw))  # offset x, y
-        x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
-        img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
-        labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
-
-    return img, labels
-
-
-def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
-    # Resize and pad image while meeting stride-multiple constraints
-    shape = img.shape[:2]  # current shape [height, width]
-    if isinstance(new_shape, int):
-        new_shape = (new_shape, new_shape)
-
-    # Scale ratio (new / old)
-    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
-    if not scaleup:  # only scale down, do not scale up (for better test mAP)
-        r = min(r, 1.0)
-
-    # Compute padding
-    ratio = r, r  # width, height ratios
-    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
-    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
-    if auto:  # minimum rectangle
-        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
-    elif scaleFill:  # stretch
-        dw, dh = 0.0, 0.0
-        new_unpad = (new_shape[1], new_shape[0])
-        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios
-
-    dw /= 2  # divide padding into 2 sides
-    dh /= 2
-
-    if shape[::-1] != new_unpad:  # resize
-        img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
-    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
-    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
-    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
-    return img, ratio, (dw, dh)
-
-
-def random_perspective(img, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,
-                       border=(0, 0)):
-    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
-    # targets = [cls, xyxy]
-
-    height = img.shape[0] + border[0] * 2  # shape(h,w,c)
-    width = img.shape[1] + border[1] * 2
-
-    # Center
-    C = np.eye(3)
-    C[0, 2] = -img.shape[1] / 2  # x translation (pixels)
-    C[1, 2] = -img.shape[0] / 2  # y translation (pixels)
-
-    # Perspective
-    P = np.eye(3)
-    P[2, 0] = random.uniform(-perspective, perspective)  # x perspective (about y)
-    P[2, 1] = random.uniform(-perspective, perspective)  # y perspective (about x)
-
-    # Rotation and Scale
-    R = np.eye(3)
-    a = random.uniform(-degrees, degrees)
-    # a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotations
-    s = random.uniform(1 - scale, 1 + scale)
-    # s = 2 ** random.uniform(-scale, scale)
-    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
-
-    # Shear
-    S = np.eye(3)
-    S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
-    S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)
-
-    # Translation
-    T = np.eye(3)
-    T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width  # x translation (pixels)
-    T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height  # y translation (pixels)
-
-    # Combined rotation matrix
-    M = T @ S @ R @ P @ C  # order of operations (right to left) is IMPORTANT
-    if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changed
-        if perspective:
-            img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))
-        else:  # affine
-            img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
-
-    # Visualize
-    # import matplotlib.pyplot as plt
-    # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
-    # ax[0].imshow(img[:, :, ::-1])  # base
-    # ax[1].imshow(img2[:, :, ::-1])  # warped
-
-    # Transform label coordinates
-    n = len(targets)
-    if n:
-        use_segments = any(x.any() for x in segments)
-        new = np.zeros((n, 4))
-        if use_segments:  # warp segments
-            segments = resample_segments(segments)  # upsample
-            for i, segment in enumerate(segments):
-                xy = np.ones((len(segment), 3))
-                xy[:, :2] = segment
-                xy = xy @ M.T  # transform
-                xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]  # perspective rescale or affine
-
-                # clip
-                new[i] = segment2box(xy, width, height)
-
-        else:  # warp boxes
-            xy = np.ones((n * 4, 3))
-            xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
-            xy = xy @ M.T  # transform
-            xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8)  # perspective rescale or affine
-
-            # create new boxes
-            x = xy[:, [0, 2, 4, 6]]
-            y = xy[:, [1, 3, 5, 7]]
-            new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
-
-            # clip
-            new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
-            new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
-
-        # filter candidates
-        i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
-        targets = targets[i]
-        targets[:, 1:5] = new[i]
-
-    return img, targets
-
-
-def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16):  # box1(4,n), box2(4,n)
-    # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
-    w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
-    w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
-    ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps))  # aspect ratio
-    return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr)  # candidates
-
-
-def cutout(image, labels):
-    # Applies image cutout augmentation https://arxiv.org/abs/1708.04552
-    h, w = image.shape[:2]
-
-    def bbox_ioa(box1, box2):
-        # Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2
-        box2 = box2.transpose()
-
-        # Get the coordinates of bounding boxes
-        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
-        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
-
-        # Intersection area
-        inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
-                     (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
-
-        # box2 area
-        box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16
-
-        # Intersection over box2 area
-        return inter_area / box2_area
-
-    # create random masks
-    scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16  # image size fraction
-    for s in scales:
-        mask_h = random.randint(1, int(h * s))
-        mask_w = random.randint(1, int(w * s))
-
-        # box
-        xmin = max(0, random.randint(0, w) - mask_w // 2)
-        ymin = max(0, random.randint(0, h) - mask_h // 2)
-        xmax = min(w, xmin + mask_w)
-        ymax = min(h, ymin + mask_h)
-
-        # apply random color mask
-        image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
-
-        # return unobscured labels
-        if len(labels) and s > 0.03:
-            box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
-            ioa = bbox_ioa(box, labels[:, 1:5])  # intersection over area
-            labels = labels[ioa < 0.60]  # remove >60% obscured labels
-
-    return labels
-
-
-def create_folder(path='./new'):
-    # Create folder
-    if os.path.exists(path):
-        shutil.rmtree(path)  # delete output folder
-    os.makedirs(path)  # make new output folder
-
-
-def flatten_recursive(path='../coco128'):
-    # Flatten a recursive directory by bringing all files to top level
-    new_path = Path(path + '_flat')
-    create_folder(new_path)
-    for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)):
-        shutil.copyfile(file, new_path / Path(file).name)
-
-
-def extract_boxes(path='../coco128/'):  # from utils.datasets import *; extract_boxes('../coco128')
-    # Convert detection dataset into classification dataset, with one directory per class
-
-    path = Path(path)  # images dir
-    shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None  # remove existing
-    files = list(path.rglob('*.*'))
-    n = len(files)  # number of files
-    for im_file in tqdm(files, total=n):
-        if im_file.suffix[1:] in img_formats:
-            # image
-            im = cv2.imread(str(im_file))[..., ::-1]  # BGR to RGB
-            h, w = im.shape[:2]
-
-            # labels
-            lb_file = Path(img2label_paths([str(im_file)])[0])
-            if Path(lb_file).exists():
-                with open(lb_file, 'r') as f:
-                    lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32)  # labels
-
-                for j, x in enumerate(lb):
-                    c = int(x[0])  # class
-                    f = (path / 'classifier') / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg'  # new filename
-                    if not f.parent.is_dir():
-                        f.parent.mkdir(parents=True)
-
-                    b = x[1:] * [w, h, w, h]  # box
-                    # b[2:] = b[2:].max()  # rectangle to square
-                    b[2:] = b[2:] * 1.2 + 3  # pad
-                    b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
-
-                    b[[0, 2]] = np.clip(b[[0, 2]], 0, w)  # clip boxes outside of image
-                    b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
-                    assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'
-
-
-def autosplit(path='../coco128', weights=(0.9, 0.1, 0.0), annotated_only=False):
-    """ Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
-    Usage: from utils.datasets import *; autosplit('../coco128')
-    Arguments
-        path:           Path to images directory
-        weights:        Train, val, test weights (list)
-        annotated_only: Only use images with an annotated txt file
-    """
-    path = Path(path)  # images dir
-    files = sum([list(path.rglob(f"*.{img_ext}")) for img_ext in img_formats], [])  # image files only
-    n = len(files)  # number of files
-    indices = random.choices([0, 1, 2], weights=weights, k=n)  # assign each image to a split
-
-    txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt']  # 3 txt files
-    [(path / x).unlink() for x in txt if (path / x).exists()]  # remove existing
-
-    print(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only)
-    for i, img in tqdm(zip(indices, files), total=n):
-        if not annotated_only or Path(img2label_paths([str(img)])[0]).exists():  # check label
-            with open(path / txt[i], 'a') as f:
-                f.write(str(img) + '\n')  # add image to txt file
diff --git a/utils/docker/Dockerfile b/utils/docker/Dockerfile
new file mode 100644
index 000000000000..b5d2af9fb08e
--- /dev/null
+++ b/utils/docker/Dockerfile
@@ -0,0 +1,75 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Builds ultralytics/yolov5:latest image on DockerHub https://hub.docker.com/r/ultralytics/yolov5
+# Image is CUDA-optimized for YOLOv5 single/multi-GPU training and inference
+
+# Start FROM NVIDIA PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch
+# FROM docker.io/pytorch/pytorch:latest
+FROM pytorch/pytorch:latest
+
+# Downloads to user config dir
+ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/
+
+# Install linux packages
+ENV DEBIAN_FRONTEND noninteractive
+RUN apt update
+RUN TZ=Etc/UTC apt install -y tzdata
+RUN apt install --no-install-recommends -y gcc git zip curl htop libgl1-mesa-glx libglib2.0-0 libpython3-dev gnupg
+# RUN alias python=python3
+
+# Security updates
+# https://security.snyk.io/vuln/SNYK-UBUNTU1804-OPENSSL-3314796
+RUN apt upgrade --no-install-recommends -y openssl
+
+# Create working directory
+RUN rm -rf /usr/src/app && mkdir -p /usr/src/app
+WORKDIR /usr/src/app
+
+# Copy contents
+# COPY . /usr/src/app  (issues as not a .git directory)
+RUN git clone https://github.com/ultralytics/yolov5 /usr/src/app
+
+# Install pip packages
+COPY requirements.txt .
+RUN python3 -m pip install --upgrade pip wheel
+RUN pip install --no-cache -r requirements.txt albumentations comet gsutil notebook \
+    coremltools onnx onnx-simplifier onnxruntime 'openvino-dev>=2022.3'
+    # tensorflow tensorflowjs \
+
+# Set environment variables
+ENV OMP_NUM_THREADS=1
+
+# Cleanup
+ENV DEBIAN_FRONTEND teletype
+
+
+# Usage Examples -------------------------------------------------------------------------------------------------------
+
+# Build and Push
+# t=ultralytics/yolov5:latest && sudo docker build -f utils/docker/Dockerfile -t $t . && sudo docker push $t
+
+# Pull and Run
+# t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all $t
+
+# Pull and Run with local directory access
+# t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all -v "$(pwd)"/datasets:/usr/src/datasets $t
+
+# Kill all
+# sudo docker kill $(sudo docker ps -q)
+
+# Kill all image-based
+# sudo docker kill $(sudo docker ps -qa --filter ancestor=ultralytics/yolov5:latest)
+
+# DockerHub tag update
+# t=ultralytics/yolov5:latest tnew=ultralytics/yolov5:v6.2 && sudo docker pull $t && sudo docker tag $t $tnew && sudo docker push $tnew
+
+# Clean up
+# sudo docker system prune -a --volumes
+
+# Update Ubuntu drivers
+# https://www.maketecheasier.com/install-nvidia-drivers-ubuntu/
+
+# DDP test
+# python -m torch.distributed.run --nproc_per_node 2 --master_port 1 train.py --epochs 3
+
+# GCP VM from Image
+# docker.io/ultralytics/yolov5:latest
diff --git a/utils/docker/Dockerfile-arm64 b/utils/docker/Dockerfile-arm64
new file mode 100644
index 000000000000..7023c6a4bb1f
--- /dev/null
+++ b/utils/docker/Dockerfile-arm64
@@ -0,0 +1,41 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Builds ultralytics/yolov5:latest-arm64 image on DockerHub https://hub.docker.com/r/ultralytics/yolov5
+# Image is aarch64-compatible for Apple M1 and other ARM architectures i.e. Jetson Nano and Raspberry Pi
+
+# Start FROM Ubuntu image https://hub.docker.com/_/ubuntu
+FROM arm64v8/ubuntu:rolling
+
+# Downloads to user config dir
+ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/
+
+# Install linux packages
+ENV DEBIAN_FRONTEND noninteractive
+RUN apt update
+RUN TZ=Etc/UTC apt install -y tzdata
+RUN apt install --no-install-recommends -y python3-pip git zip curl htop gcc libgl1-mesa-glx libglib2.0-0 libpython3-dev
+# RUN alias python=python3
+
+# Install pip packages
+COPY requirements.txt .
+RUN python3 -m pip install --upgrade pip wheel
+RUN pip install --no-cache -r requirements.txt albumentations gsutil notebook \
+    coremltools onnx onnxruntime
+    # tensorflow-aarch64 tensorflowjs \
+
+# Create working directory
+RUN mkdir -p /usr/src/app
+WORKDIR /usr/src/app
+
+# Copy contents
+# COPY . /usr/src/app  (issues as not a .git directory)
+RUN git clone https://github.com/ultralytics/yolov5 /usr/src/app
+ENV DEBIAN_FRONTEND teletype
+
+
+# Usage Examples -------------------------------------------------------------------------------------------------------
+
+# Build and Push
+# t=ultralytics/yolov5:latest-arm64 && sudo docker build --platform linux/arm64 -f utils/docker/Dockerfile-arm64 -t $t . && sudo docker push $t
+
+# Pull and Run
+# t=ultralytics/yolov5:latest-arm64 && sudo docker pull $t && sudo docker run -it --ipc=host -v "$(pwd)"/datasets:/usr/src/datasets $t
diff --git a/utils/docker/Dockerfile-cpu b/utils/docker/Dockerfile-cpu
new file mode 100644
index 000000000000..06bad9a3790d
--- /dev/null
+++ b/utils/docker/Dockerfile-cpu
@@ -0,0 +1,42 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Builds ultralytics/yolov5:latest-cpu image on DockerHub https://hub.docker.com/r/ultralytics/yolov5
+# Image is CPU-optimized for ONNX, OpenVINO and PyTorch YOLOv5 deployments
+
+# Start FROM Ubuntu image https://hub.docker.com/_/ubuntu
+FROM ubuntu:rolling
+
+# Downloads to user config dir
+ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/
+
+# Install linux packages
+ENV DEBIAN_FRONTEND noninteractive
+RUN apt update
+RUN TZ=Etc/UTC apt install -y tzdata
+RUN apt install --no-install-recommends -y python3-pip git zip curl htop libgl1-mesa-glx libglib2.0-0 libpython3-dev gnupg
+# RUN alias python=python3
+
+# Install pip packages
+COPY requirements.txt .
+RUN python3 -m pip install --upgrade pip wheel
+RUN pip install --no-cache -r requirements.txt albumentations gsutil notebook \
+    coremltools onnx onnx-simplifier onnxruntime 'openvino-dev>=2022.3' \
+    # tensorflow tensorflowjs \
+    --extra-index-url https://download.pytorch.org/whl/cpu
+
+# Create working directory
+RUN mkdir -p /usr/src/app
+WORKDIR /usr/src/app
+
+# Copy contents
+# COPY . /usr/src/app  (issues as not a .git directory)
+RUN git clone https://github.com/ultralytics/yolov5 /usr/src/app
+ENV DEBIAN_FRONTEND teletype
+
+
+# Usage Examples -------------------------------------------------------------------------------------------------------
+
+# Build and Push
+# t=ultralytics/yolov5:latest-cpu && sudo docker build -f utils/docker/Dockerfile-cpu -t $t . && sudo docker push $t
+
+# Pull and Run
+# t=ultralytics/yolov5:latest-cpu && sudo docker pull $t && sudo docker run -it --ipc=host -v "$(pwd)"/datasets:/usr/src/datasets $t
diff --git a/utils/downloads.py b/utils/downloads.py
new file mode 100644
index 000000000000..88f523742b5b
--- /dev/null
+++ b/utils/downloads.py
@@ -0,0 +1,128 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Download utils
+"""
+
+import logging
+import os
+import subprocess
+import urllib
+from pathlib import Path
+
+import requests
+import torch
+
+
+def is_url(url, check=True):
+    # Check if string is URL and check if URL exists
+    try:
+        url = str(url)
+        result = urllib.parse.urlparse(url)
+        assert all([result.scheme, result.netloc])  # check if is url
+        return (urllib.request.urlopen(url).getcode() == 200) if check else True  # check if exists online
+    except (AssertionError, urllib.request.HTTPError):
+        return False
+
+
+def gsutil_getsize(url=''):
+    # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
+    output = subprocess.check_output(['gsutil', 'du', url], shell=True, encoding='utf-8')
+    if output:
+        return int(output.split()[0])
+    return 0
+
+
+def url_getsize(url='https://ultralytics.com/images/bus.jpg'):
+    # Return downloadable file size in bytes
+    response = requests.head(url, allow_redirects=True)
+    return int(response.headers.get('content-length', -1))
+
+
+def curl_download(url, filename, *, silent: bool = False) -> bool:
+    """
+    Download a file from a url to a filename using curl.
+    """
+    silent_option = 'sS' if silent else ''  # silent
+    proc = subprocess.run([
+        'curl',
+        '-#',
+        f'-{silent_option}L',
+        url,
+        '--output',
+        filename,
+        '--retry',
+        '9',
+        '-C',
+        '-',])
+    return proc.returncode == 0
+
+
+def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
+    # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
+    from utils.general import LOGGER
+
+    file = Path(file)
+    assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
+    try:  # url1
+        LOGGER.info(f'Downloading {url} to {file}...')
+        torch.hub.download_url_to_file(url, str(file), progress=LOGGER.level <= logging.INFO)
+        assert file.exists() and file.stat().st_size > min_bytes, assert_msg  # check
+    except Exception as e:  # url2
+        if file.exists():
+            file.unlink()  # remove partial downloads
+        LOGGER.info(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')
+        # curl download, retry and resume on fail
+        curl_download(url2 or url, file)
+    finally:
+        if not file.exists() or file.stat().st_size < min_bytes:  # check
+            if file.exists():
+                file.unlink()  # remove partial downloads
+            LOGGER.info(f'ERROR: {assert_msg}\n{error_msg}')
+        LOGGER.info('')
+
+
+def attempt_download(file, repo='ultralytics/yolov5', release='v7.0'):
+    # Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v7.0', etc.
+    from utils.general import LOGGER
+
+    def github_assets(repository, version='latest'):
+        # Return GitHub repo tag (i.e. 'v7.0') and assets (i.e. ['yolov5s.pt', 'yolov5m.pt', ...])
+        if version != 'latest':
+            version = f'tags/{version}'  # i.e. tags/v7.0
+        response = requests.get(f'https://api.github.com/repos/{repository}/releases/{version}').json()  # github api
+        return response['tag_name'], [x['name'] for x in response['assets']]  # tag, assets
+
+    file = Path(str(file).strip().replace("'", ''))
+    if not file.exists():
+        # URL specified
+        name = Path(urllib.parse.unquote(str(file))).name  # decode '%2F' to '/' etc.
+        if str(file).startswith(('http:/', 'https:/')):  # download
+            url = str(file).replace(':/', '://')  # Pathlib turns :// -> :/
+            file = name.split('?')[0]  # parse authentication https://url.com/file.txt?auth...
+            if Path(file).is_file():
+                LOGGER.info(f'Found {url} locally at {file}')  # file already exists
+            else:
+                safe_download(file=file, url=url, min_bytes=1E5)
+            return file
+
+        # GitHub assets
+        assets = [f'yolov5{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')]  # default
+        try:
+            tag, assets = github_assets(repo, release)
+        except Exception:
+            try:
+                tag, assets = github_assets(repo)  # latest release
+            except Exception:
+                try:
+                    tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1]
+                except Exception:
+                    tag = release
+
+        if name in assets:
+            file.parent.mkdir(parents=True, exist_ok=True)  # make parent dir (if required)
+            safe_download(file,
+                          url=f'https://github.com/{repo}/releases/download/{tag}/{name}',
+                          min_bytes=1E5,
+                          error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag}')
+
+    return str(file)
diff --git a/utils/flask_rest_api/README.md b/utils/flask_rest_api/README.md
index 0cdc51be692d..a726acbd9204 100644
--- a/utils/flask_rest_api/README.md
+++ b/utils/flask_rest_api/README.md
@@ -1,9 +1,13 @@
 # Flask REST API
-[REST](https://en.wikipedia.org/wiki/Representational_state_transfer) [API](https://en.wikipedia.org/wiki/API)s are commonly used to expose Machine Learning (ML)  models to other services. This folder contains an example REST API created using Flask to expose the `yolov5s` model from [PyTorch Hub](https://pytorch.org/hub/ultralytics_yolov5/).
+
+[REST](https://en.wikipedia.org/wiki/Representational_state_transfer) [API](https://en.wikipedia.org/wiki/API)s are
+commonly used to expose Machine Learning (ML)  models to other services. This folder contains an example REST API
+created using Flask to expose the YOLOv5s model from [PyTorch Hub](https://pytorch.org/hub/ultralytics_yolov5/).
 
 ## Requirements
 
 [Flask](https://palletsprojects.com/p/flask/) is required. Install with:
+
 ```shell
 $ pip install Flask
 ```
@@ -19,33 +23,51 @@ $ python3 restapi.py --port 5000
 Then use [curl](https://curl.se/) to perform a request:
 
 ```shell
-$ curl -X POST -F image=@zidane.jpg 'http://localhost:5000/v1/object-detection/yolov5s'`
+$ curl -X POST -F image=@zidane.jpg 'http://localhost:5000/v1/object-detection/yolov5s'
 ```
 
-The model inference results are returned:
+The model inference results are returned as a JSON response:
 
-```shell
-[{'class': 0,
-  'confidence': 0.8197850585,
-  'name': 'person',
-  'xmax': 1159.1403808594,
-  'xmin': 750.912902832,
-  'ymax': 711.2583007812,
-  'ymin': 44.0350036621},
- {'class': 0,
-  'confidence': 0.5667674541,
-  'name': 'person',
-  'xmax': 1065.5523681641,
-  'xmin': 116.0448303223,
-  'ymax': 713.8904418945,
-  'ymin': 198.4603881836},
- {'class': 27,
-  'confidence': 0.5661227107,
-  'name': 'tie',
-  'xmax': 516.7975463867,
-  'xmin': 416.6880187988,
-  'ymax': 717.0524902344,
-  'ymin': 429.2020568848}]
+```json
+[
+  {
+    "class": 0,
+    "confidence": 0.8900438547,
+    "height": 0.9318675399,
+    "name": "person",
+    "width": 0.3264600933,
+    "xcenter": 0.7438579798,
+    "ycenter": 0.5207948685
+  },
+  {
+    "class": 0,
+    "confidence": 0.8440024257,
+    "height": 0.7155083418,
+    "name": "person",
+    "width": 0.6546785235,
+    "xcenter": 0.427829951,
+    "ycenter": 0.6334488392
+  },
+  {
+    "class": 27,
+    "confidence": 0.3771208823,
+    "height": 0.3902671337,
+    "name": "tie",
+    "width": 0.0696444362,
+    "xcenter": 0.3675483763,
+    "ycenter": 0.7991207838
+  },
+  {
+    "class": 27,
+    "confidence": 0.3527112305,
+    "height": 0.1540903747,
+    "name": "tie",
+    "width": 0.0336618312,
+    "xcenter": 0.7814827561,
+    "ycenter": 0.5065554976
+  }
+]
 ```
 
-An example python script to perform inference using [requests](https://docs.python-requests.org/en/master/) is given in `example_request.py`
+An example python script to perform inference using [requests](https://docs.python-requests.org/en/master/) is given
+in `example_request.py`
diff --git a/utils/flask_rest_api/example_request.py b/utils/flask_rest_api/example_request.py
index ff21f30f93ca..952e5dcb90fa 100644
--- a/utils/flask_rest_api/example_request.py
+++ b/utils/flask_rest_api/example_request.py
@@ -1,13 +1,19 @@
-"""Perform test request"""
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Perform test request
+"""
+
 import pprint
 
 import requests
 
-DETECTION_URL = "http://localhost:5000/v1/object-detection/yolov5s"
-TEST_IMAGE = "zidane.jpg"
+DETECTION_URL = 'http://localhost:5000/v1/object-detection/yolov5s'
+IMAGE = 'zidane.jpg'
 
-image_data = open(TEST_IMAGE, "rb").read()
+# Read image
+with open(IMAGE, 'rb') as f:
+    image_data = f.read()
 
-response = requests.post(DETECTION_URL, files={"image": image_data}).json()
+response = requests.post(DETECTION_URL, files={'image': image_data}).json()
 
 pprint.pprint(response)
diff --git a/utils/flask_rest_api/restapi.py b/utils/flask_rest_api/restapi.py
index 9d88f618905d..9258b1a68860 100644
--- a/utils/flask_rest_api/restapi.py
+++ b/utils/flask_rest_api/restapi.py
@@ -1,38 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 """
-Run a rest API exposing the yolov5s object detection model
+Run a Flask REST API exposing one or more YOLOv5s models
 """
+
 import argparse
 import io
 
 import torch
-from PIL import Image
 from flask import Flask, request
+from PIL import Image
 
 app = Flask(__name__)
+models = {}
 
-DETECTION_URL = "/v1/object-detection/yolov5s"
+DETECTION_URL = '/v1/object-detection/<model>'
 
 
-@app.route(DETECTION_URL, methods=["POST"])
-def predict():
-    if not request.method == "POST":
+@app.route(DETECTION_URL, methods=['POST'])
+def predict(model):
+    if request.method != 'POST':
         return
 
-    if request.files.get("image"):
-        image_file = request.files["image"]
-        image_bytes = image_file.read()
+    if request.files.get('image'):
+        # Method 1
+        # with request.files["image"] as f:
+        #     im = Image.open(io.BytesIO(f.read()))
+
+        # Method 2
+        im_file = request.files['image']
+        im_bytes = im_file.read()
+        im = Image.open(io.BytesIO(im_bytes))
 
-        img = Image.open(io.BytesIO(image_bytes))
+        if model in models:
+            results = models[model](im, size=640)  # reduce size=320 for faster inference
+            return results.pandas().xyxy[0].to_json(orient='records')
 
-        results = model(img, size=640)
-        data = results.pandas().xyxy[0].to_json(orient="records")
-        return data
 
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Flask API exposing YOLOv5 model')
+    parser.add_argument('--port', default=5000, type=int, help='port number')
+    parser.add_argument('--model', nargs='+', default=['yolov5s'], help='model(s) to run, i.e. --model yolov5n yolov5s')
+    opt = parser.parse_args()
 
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Flask api exposing yolov5 model")
-    parser.add_argument("--port", default=5000, type=int, help="port number")
-    args = parser.parse_args()
+    for m in opt.model:
+        models[m] = torch.hub.load('ultralytics/yolov5', m, force_reload=True, skip_validation=True)
 
-    model = torch.hub.load("ultralytics/yolov5", "yolov5s", force_reload=True).autoshape()  # force_reload to recache
-    app.run(host="0.0.0.0", port=args.port)  # debug=True causes Restarting with stat
+    app.run(host='0.0.0.0', port=opt.port)  # debug=True causes Restarting with stat
diff --git a/utils/general.py b/utils/general.py
old mode 100755
new mode 100644
index bfea231629a9..adb924257162
--- a/utils/general.py
+++ b/utils/general.py
@@ -1,48 +1,272 @@
-# YOLOv5 general utils
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+General utils
+"""
 
+import contextlib
 import glob
+import inspect
 import logging
+import logging.config
 import math
 import os
 import platform
 import random
 import re
+import signal
 import subprocess
+import sys
 import time
+import urllib
+from copy import deepcopy
+from datetime import datetime
 from itertools import repeat
 from multiprocessing.pool import ThreadPool
 from pathlib import Path
+from subprocess import check_output
+from tarfile import is_tarfile
+from typing import Optional
+from zipfile import ZipFile, is_zipfile
 
 import cv2
 import numpy as np
 import pandas as pd
+import pkg_resources as pkg
 import torch
 import torchvision
 import yaml
 
-from utils.google_utils import gsutil_getsize
-from utils.metrics import fitness
-from utils.torch_utils import init_torch_seeds
+from utils import TryExcept, emojis
+from utils.downloads import curl_download, gsutil_getsize
+from utils.metrics import box_iou, fitness
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+RANK = int(os.getenv('RANK', -1))
 
 # Settings
+NUM_THREADS = min(8, max(1, os.cpu_count() - 1))  # number of YOLOv5 multiprocessing threads
+DATASETS_DIR = Path(os.getenv('YOLOv5_DATASETS_DIR', ROOT.parent / 'datasets'))  # global datasets directory
+AUTOINSTALL = str(os.getenv('YOLOv5_AUTOINSTALL', True)).lower() == 'true'  # global auto-install mode
+VERBOSE = str(os.getenv('YOLOv5_VERBOSE', True)).lower() == 'true'  # global verbose mode
+TQDM_BAR_FORMAT = '{l_bar}{bar:10}{r_bar}'  # tqdm bar format
+FONT = 'Arial.ttf'  # https://ultralytics.com/assets/Arial.ttf
+
 torch.set_printoptions(linewidth=320, precision=5, profile='long')
 np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format})  # format short g, %precision=5
 pd.options.display.max_columns = 10
 cv2.setNumThreads(0)  # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader)
-os.environ['NUMEXPR_MAX_THREADS'] = str(min(os.cpu_count(), 8))  # NumExpr max threads
+os.environ['NUMEXPR_MAX_THREADS'] = str(NUM_THREADS)  # NumExpr max threads
+os.environ['OMP_NUM_THREADS'] = '1' if platform.system() == 'darwin' else str(NUM_THREADS)  # OpenMP (PyTorch and SciPy)
+
+
+def is_ascii(s=''):
+    # Is string composed of all ASCII (no UTF) characters? (note str().isascii() introduced in python 3.7)
+    s = str(s)  # convert list, tuple, None, etc. to str
+    return len(s.encode().decode('ascii', 'ignore')) == len(s)
+
+
+def is_chinese(s='人工智能'):
+    # Is string composed of any Chinese characters?
+    return bool(re.search('[\u4e00-\u9fff]', str(s)))
+
+
+def is_colab():
+    # Is environment a Google Colab instance?
+    return 'google.colab' in sys.modules
+
+
+def is_jupyter():
+    """
+    Check if the current script is running inside a Jupyter Notebook.
+    Verified on Colab, Jupyterlab, Kaggle, Paperspace.
+
+    Returns:
+        bool: True if running inside a Jupyter Notebook, False otherwise.
+    """
+    with contextlib.suppress(Exception):
+        from IPython import get_ipython
+        return get_ipython() is not None
+    return False
+
+
+def is_kaggle():
+    # Is environment a Kaggle Notebook?
+    return os.environ.get('PWD') == '/kaggle/working' and os.environ.get('KAGGLE_URL_BASE') == 'https://www.kaggle.com'
+
+
+def is_docker() -> bool:
+    """Check if the process runs inside a docker container."""
+    if Path('/.dockerenv').exists():
+        return True
+    try:  # check if docker is in control groups
+        with open('/proc/self/cgroup') as file:
+            return any('docker' in line for line in file)
+    except OSError:
+        return False
+
+
+def is_writeable(dir, test=False):
+    # Return True if directory has write permissions, test opening a file with write permissions if test=True
+    if not test:
+        return os.access(dir, os.W_OK)  # possible issues on Windows
+    file = Path(dir) / 'tmp.txt'
+    try:
+        with open(file, 'w'):  # open file with write permissions
+            pass
+        file.unlink()  # remove file
+        return True
+    except OSError:
+        return False
+
+
+LOGGING_NAME = 'yolov5'
+
+
+def set_logging(name=LOGGING_NAME, verbose=True):
+    # sets up logging for the given name
+    rank = int(os.getenv('RANK', -1))  # rank in world for Multi-GPU trainings
+    level = logging.INFO if verbose and rank in {-1, 0} else logging.ERROR
+    logging.config.dictConfig({
+        'version': 1,
+        'disable_existing_loggers': False,
+        'formatters': {
+            name: {
+                'format': '%(message)s'}},
+        'handlers': {
+            name: {
+                'class': 'logging.StreamHandler',
+                'formatter': name,
+                'level': level,}},
+        'loggers': {
+            name: {
+                'level': level,
+                'handlers': [name],
+                'propagate': False,}}})
+
+
+set_logging(LOGGING_NAME)  # run before defining LOGGER
+LOGGER = logging.getLogger(LOGGING_NAME)  # define globally (used in train.py, val.py, detect.py, etc.)
+if platform.system() == 'Windows':
+    for fn in LOGGER.info, LOGGER.warning:
+        setattr(LOGGER, fn.__name__, lambda x: fn(emojis(x)))  # emoji safe logging
+
+
+def user_config_dir(dir='Ultralytics', env_var='YOLOV5_CONFIG_DIR'):
+    # Return path of user configuration directory. Prefer environment variable if exists. Make dir if required.
+    env = os.getenv(env_var)
+    if env:
+        path = Path(env)  # use environment variable
+    else:
+        cfg = {'Windows': 'AppData/Roaming', 'Linux': '.config', 'Darwin': 'Library/Application Support'}  # 3 OS dirs
+        path = Path.home() / cfg.get(platform.system(), '')  # OS-specific config dir
+        path = (path if is_writeable(path) else Path('/tmp')) / dir  # GCP and AWS lambda fix, only /tmp is writeable
+    path.mkdir(exist_ok=True)  # make if required
+    return path
+
+
+CONFIG_DIR = user_config_dir()  # Ultralytics settings dir
+
 
+class Profile(contextlib.ContextDecorator):
+    # YOLOv5 Profile class. Usage: @Profile() decorator or 'with Profile():' context manager
+    def __init__(self, t=0.0):
+        self.t = t
+        self.cuda = torch.cuda.is_available()
 
-def set_logging(rank=-1, verbose=True):
-    logging.basicConfig(
-        format="%(message)s",
-        level=logging.INFO if (verbose and rank in [-1, 0]) else logging.WARN)
+    def __enter__(self):
+        self.start = self.time()
+        return self
 
+    def __exit__(self, type, value, traceback):
+        self.dt = self.time() - self.start  # delta-time
+        self.t += self.dt  # accumulate dt
 
-def init_seeds(seed=0):
-    # Initialize random number generator (RNG) seeds
+    def time(self):
+        if self.cuda:
+            torch.cuda.synchronize()
+        return time.time()
+
+
+class Timeout(contextlib.ContextDecorator):
+    # YOLOv5 Timeout class. Usage: @Timeout(seconds) decorator or 'with Timeout(seconds):' context manager
+    def __init__(self, seconds, *, timeout_msg='', suppress_timeout_errors=True):
+        self.seconds = int(seconds)
+        self.timeout_message = timeout_msg
+        self.suppress = bool(suppress_timeout_errors)
+
+    def _timeout_handler(self, signum, frame):
+        raise TimeoutError(self.timeout_message)
+
+    def __enter__(self):
+        if platform.system() != 'Windows':  # not supported on Windows
+            signal.signal(signal.SIGALRM, self._timeout_handler)  # Set handler for SIGALRM
+            signal.alarm(self.seconds)  # start countdown for SIGALRM to be raised
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if platform.system() != 'Windows':
+            signal.alarm(0)  # Cancel SIGALRM if it's scheduled
+            if self.suppress and exc_type is TimeoutError:  # Suppress TimeoutError
+                return True
+
+
+class WorkingDirectory(contextlib.ContextDecorator):
+    # Usage: @WorkingDirectory(dir) decorator or 'with WorkingDirectory(dir):' context manager
+    def __init__(self, new_dir):
+        self.dir = new_dir  # new dir
+        self.cwd = Path.cwd().resolve()  # current dir
+
+    def __enter__(self):
+        os.chdir(self.dir)
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        os.chdir(self.cwd)
+
+
+def methods(instance):
+    # Get class/instance methods
+    return [f for f in dir(instance) if callable(getattr(instance, f)) and not f.startswith('__')]
+
+
+def print_args(args: Optional[dict] = None, show_file=True, show_func=False):
+    # Print function arguments (optional args dict)
+    x = inspect.currentframe().f_back  # previous frame
+    file, _, func, _, _ = inspect.getframeinfo(x)
+    if args is None:  # get args automatically
+        args, _, _, frm = inspect.getargvalues(x)
+        args = {k: v for k, v in frm.items() if k in args}
+    try:
+        file = Path(file).resolve().relative_to(ROOT).with_suffix('')
+    except ValueError:
+        file = Path(file).stem
+    s = (f'{file}: ' if show_file else '') + (f'{func}: ' if show_func else '')
+    LOGGER.info(colorstr(s) + ', '.join(f'{k}={v}' for k, v in args.items()))
+
+
+def init_seeds(seed=0, deterministic=False):
+    # Initialize random number generator (RNG) seeds https://pytorch.org/docs/stable/notes/randomness.html
     random.seed(seed)
     np.random.seed(seed)
-    init_torch_seeds(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)  # for Multi-GPU, exception safe
+    # torch.backends.cudnn.benchmark = True  # AutoBatch problem https://github.com/ultralytics/yolov5/issues/9287
+    if deterministic and check_version(torch.__version__, '1.12.0'):  # https://github.com/ultralytics/yolov5/pull/8213
+        torch.use_deterministic_algorithms(True)
+        torch.backends.cudnn.deterministic = True
+        os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
+        os.environ['PYTHONHASHSEED'] = str(seed)
+
+
+def intersect_dicts(da, db, exclude=()):
+    # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values
+    return {k: v for k, v in da.items() if k in db and all(x not in k for x in exclude) and v.shape == db[k].shape}
+
+
+def get_default_args(func):
+    # Get func() default arguments
+    signature = inspect.signature(func)
+    return {k: v.default for k, v in signature.parameters.items() if v.default is not inspect.Parameter.empty}
 
 
 def get_latest_run(search_dir='.'):
@@ -51,199 +275,442 @@ def get_latest_run(search_dir='.'):
     return max(last_list, key=os.path.getctime) if last_list else ''
 
 
-def isdocker():
-    # Is environment a Docker container
-    return Path('/workspace').exists()  # or Path('/.dockerenv').exists()
+def file_age(path=__file__):
+    # Return days since last file update
+    dt = (datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime))  # delta
+    return dt.days  # + dt.seconds / 86400  # fractional days
 
 
-def emojis(str=''):
-    # Return platform-dependent emoji-safe version of string
-    return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str
+def file_date(path=__file__):
+    # Return human-readable file modification date, i.e. '2021-3-26'
+    t = datetime.fromtimestamp(Path(path).stat().st_mtime)
+    return f'{t.year}-{t.month}-{t.day}'
 
 
-def file_size(file):
-    # Return file size in MB
-    return Path(file).stat().st_size / 1e6
+def file_size(path):
+    # Return file/dir size (MB)
+    mb = 1 << 20  # bytes to MiB (1024 ** 2)
+    path = Path(path)
+    if path.is_file():
+        return path.stat().st_size / mb
+    elif path.is_dir():
+        return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / mb
+    else:
+        return 0.0
 
 
 def check_online():
     # Check internet connectivity
     import socket
-    try:
-        socket.create_connection(("1.1.1.1", 443), 5)  # check host accesability
-        return True
-    except OSError:
-        return False
 
+    def run_once():
+        # Check once
+        try:
+            socket.create_connection(('1.1.1.1', 443), 5)  # check host accessibility
+            return True
+        except OSError:
+            return False
 
-def check_git_status():
-    # Recommend 'git pull' if code is out of date
-    print(colorstr('github: '), end='')
+    return run_once() or run_once()  # check twice to increase robustness to intermittent connectivity issues
+
+
+def git_describe(path=ROOT):  # path must be a directory
+    # Return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe
     try:
-        assert Path('.git').exists(), 'skipping check (not a git repository)'
-        assert not isdocker(), 'skipping check (Docker image)'
-        assert check_online(), 'skipping check (offline)'
-
-        cmd = 'git fetch && git config --get remote.origin.url'
-        url = subprocess.check_output(cmd, shell=True).decode().strip().rstrip('.git')  # github repo url
-        branch = subprocess.check_output('git rev-parse --abbrev-ref HEAD', shell=True).decode().strip()  # checked out
-        n = int(subprocess.check_output(f'git rev-list {branch}..origin/master --count', shell=True))  # commits behind
-        if n > 0:
-            s = f"⚠️ WARNING: code is out of date by {n} commit{'s' * (n > 1)}. " \
-                f"Use 'git pull' to update or 'git clone {url}' to download latest."
-        else:
-            s = f'up to date with {url} ✅'
-        print(emojis(s))  # emoji-safe
-    except Exception as e:
-        print(e)
+        assert (Path(path) / '.git').is_dir()
+        return check_output(f'git -C {path} describe --tags --long --always', shell=True).decode()[:-1]
+    except Exception:
+        return ''
+
+
+@TryExcept()
+@WorkingDirectory(ROOT)
+def check_git_status(repo='ultralytics/yolov5', branch='master'):
+    # YOLOv5 status check, recommend 'git pull' if code is out of date
+    url = f'https://github.com/{repo}'
+    msg = f', for updates see {url}'
+    s = colorstr('github: ')  # string
+    assert Path('.git').exists(), s + 'skipping check (not a git repository)' + msg
+    assert check_online(), s + 'skipping check (offline)' + msg
+
+    splits = re.split(pattern=r'\s', string=check_output('git remote -v', shell=True).decode())
+    matches = [repo in s for s in splits]
+    if any(matches):
+        remote = splits[matches.index(True) - 1]
+    else:
+        remote = 'ultralytics'
+        check_output(f'git remote add {remote} {url}', shell=True)
+    check_output(f'git fetch {remote}', shell=True, timeout=5)  # git fetch
+    local_branch = check_output('git rev-parse --abbrev-ref HEAD', shell=True).decode().strip()  # checked out
+    n = int(check_output(f'git rev-list {local_branch}..{remote}/{branch} --count', shell=True))  # commits behind
+    if n > 0:
+        pull = 'git pull' if remote == 'origin' else f'git pull {remote} {branch}'
+        s += f"⚠️ YOLOv5 is out of date by {n} commit{'s' * (n > 1)}. Use '{pull}' or 'git clone {url}' to update."
+    else:
+        s += f'up to date with {url} ✅'
+    LOGGER.info(s)
 
 
-def check_requirements(requirements='requirements.txt', exclude=()):
-    # Check installed dependencies meet requirements (pass *.txt file or list of packages)
-    import pkg_resources as pkg
+@WorkingDirectory(ROOT)
+def check_git_info(path='.'):
+    # YOLOv5 git info check, return {remote, branch, commit}
+    check_requirements('gitpython')
+    import git
+    try:
+        repo = git.Repo(path)
+        remote = repo.remotes.origin.url.replace('.git', '')  # i.e. 'https://github.com/ultralytics/yolov5'
+        commit = repo.head.commit.hexsha  # i.e. '3134699c73af83aac2a481435550b968d5792c0d'
+        try:
+            branch = repo.active_branch.name  # i.e. 'main'
+        except TypeError:  # not on any branch
+            branch = None  # i.e. 'detached HEAD' state
+        return {'remote': remote, 'branch': branch, 'commit': commit}
+    except git.exc.InvalidGitRepositoryError:  # path is not a git dir
+        return {'remote': None, 'branch': None, 'commit': None}
+
+
+def check_python(minimum='3.7.0'):
+    # Check current python version vs. required python version
+    check_version(platform.python_version(), minimum, name='Python ', hard=True)
+
+
+def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=False, hard=False, verbose=False):
+    # Check version vs. required version
+    current, minimum = (pkg.parse_version(x) for x in (current, minimum))
+    result = (current == minimum) if pinned else (current >= minimum)  # bool
+    s = f'WARNING ⚠️ {name}{minimum} is required by YOLOv5, but {name}{current} is currently installed'  # string
+    if hard:
+        assert result, emojis(s)  # assert min requirements met
+    if verbose and not result:
+        LOGGER.warning(s)
+    return result
+
+
+@TryExcept()
+def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(), install=True, cmds=''):
+    # Check installed dependencies meet YOLOv5 requirements (pass *.txt file or list of packages or single package str)
     prefix = colorstr('red', 'bold', 'requirements:')
-    if isinstance(requirements, (str, Path)):  # requirements.txt file
-        file = Path(requirements)
-        if not file.exists():
-            print(f"{prefix} {file.resolve()} not found, check failed.")
-            return
-        requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(file.open()) if x.name not in exclude]
-    else:  # list or tuple of packages
-        requirements = [x for x in requirements if x not in exclude]
-
-    n = 0  # number of packages updates
+    check_python()  # check python version
+    if isinstance(requirements, Path):  # requirements.txt file
+        file = requirements.resolve()
+        assert file.exists(), f'{prefix} {file} not found, check failed.'
+        with file.open() as f:
+            requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(f) if x.name not in exclude]
+    elif isinstance(requirements, str):
+        requirements = [requirements]
+
+    s = ''
+    n = 0
     for r in requirements:
         try:
             pkg.require(r)
-        except Exception as e:  # DistributionNotFound or VersionConflict if requirements not met
+        except (pkg.VersionConflict, pkg.DistributionNotFound):  # exception if requirements not met
+            s += f'"{r}" '
             n += 1
-            print(f"{prefix} {r} not found and is required by YOLOv5, attempting auto-update...")
-            print(subprocess.check_output(f"pip install '{r}'", shell=True).decode())
 
-    if n:  # if packages updated
-        source = file.resolve() if 'file' in locals() else requirements
-        s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \
-            f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n"
-        print(emojis(s))  # emoji-safe
-
-
-def check_img_size(img_size, s=32):
-    # Verify img_size is a multiple of stride s
-    new_size = make_divisible(img_size, int(s))  # ceil gs-multiple
-    if new_size != img_size:
-        print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' % (img_size, s, new_size))
+    if s and install and AUTOINSTALL:  # check environment variable
+        LOGGER.info(f"{prefix} YOLOv5 requirement{'s' * (n > 1)} {s}not found, attempting AutoUpdate...")
+        try:
+            # assert check_online(), "AutoUpdate skipped (offline)"
+            LOGGER.info(check_output(f'pip install {s} {cmds}', shell=True).decode())
+            source = file if 'file' in locals() else requirements
+            s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \
+                f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n"
+            LOGGER.info(s)
+        except Exception as e:
+            LOGGER.warning(f'{prefix} ❌ {e}')
+
+
+def check_img_size(imgsz, s=32, floor=0):
+    # Verify image size is a multiple of stride s in each dimension
+    if isinstance(imgsz, int):  # integer i.e. img_size=640
+        new_size = max(make_divisible(imgsz, int(s)), floor)
+    else:  # list i.e. img_size=[640, 480]
+        imgsz = list(imgsz)  # convert to list if tuple
+        new_size = [max(make_divisible(x, int(s)), floor) for x in imgsz]
+    if new_size != imgsz:
+        LOGGER.warning(f'WARNING ⚠️ --img-size {imgsz} must be multiple of max stride {s}, updating to {new_size}')
     return new_size
 
 
-def check_imshow():
+def check_imshow(warn=False):
     # Check if environment supports image displays
     try:
-        assert not isdocker(), 'cv2.imshow() is disabled in Docker environments'
+        assert not is_jupyter()
+        assert not is_docker()
         cv2.imshow('test', np.zeros((1, 1, 3)))
         cv2.waitKey(1)
         cv2.destroyAllWindows()
         cv2.waitKey(1)
         return True
     except Exception as e:
-        print(f'WARNING: Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}')
+        if warn:
+            LOGGER.warning(f'WARNING ⚠️ Environment does not support cv2.imshow() or PIL Image.show()\n{e}')
         return False
 
 
-def check_file(file):
-    # Search for file if not found
-    if Path(file).is_file() or file == '':
+def check_suffix(file='yolov5s.pt', suffix=('.pt',), msg=''):
+    # Check file(s) for acceptable suffix
+    if file and suffix:
+        if isinstance(suffix, str):
+            suffix = [suffix]
+        for f in file if isinstance(file, (list, tuple)) else [file]:
+            s = Path(f).suffix.lower()  # file suffix
+            if len(s):
+                assert s in suffix, f'{msg}{f} acceptable suffix is {suffix}'
+
+
+def check_yaml(file, suffix=('.yaml', '.yml')):
+    # Search/download YAML file (if necessary) and return path, checking suffix
+    return check_file(file, suffix)
+
+
+def check_file(file, suffix=''):
+    # Search/download file (if necessary) and return path
+    check_suffix(file, suffix)  # optional
+    file = str(file)  # convert to str()
+    if os.path.isfile(file) or not file:  # exists
         return file
-    else:
-        files = glob.glob('./**/' + file, recursive=True)  # find file
-        assert len(files), f'File Not Found: {file}'  # assert file was found
+    elif file.startswith(('http:/', 'https:/')):  # download
+        url = file  # warning: Pathlib turns :// -> :/
+        file = Path(urllib.parse.unquote(file).split('?')[0]).name  # '%2F' to '/', split https://url.com/file.txt?auth
+        if os.path.isfile(file):
+            LOGGER.info(f'Found {url} locally at {file}')  # file already exists
+        else:
+            LOGGER.info(f'Downloading {url} to {file}...')
+            torch.hub.download_url_to_file(url, file)
+            assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}'  # check
+        return file
+    elif file.startswith('clearml://'):  # ClearML Dataset ID
+        assert 'clearml' in sys.modules, "ClearML is not installed, so cannot use ClearML dataset. Try running 'pip install clearml'."
+        return file
+    else:  # search
+        files = []
+        for d in 'data', 'models', 'utils':  # search directories
+            files.extend(glob.glob(str(ROOT / d / '**' / file), recursive=True))  # find file
+        assert len(files), f'File not found: {file}'  # assert file was found
         assert len(files) == 1, f"Multiple files match '{file}', specify exact path: {files}"  # assert unique
         return files[0]  # return file
 
 
-def check_dataset(dict):
-    # Download dataset if not found locally
-    val, s = dict.get('val'), dict.get('download')
-    if val and len(val):
+def check_font(font=FONT, progress=False):
+    # Download font to CONFIG_DIR if necessary
+    font = Path(font)
+    file = CONFIG_DIR / font.name
+    if not font.exists() and not file.exists():
+        url = f'https://ultralytics.com/assets/{font.name}'
+        LOGGER.info(f'Downloading {url} to {file}...')
+        torch.hub.download_url_to_file(url, str(file), progress=progress)
+
+
+def check_dataset(data, autodownload=True):
+    # Download, check and/or unzip dataset if not found locally
+
+    # Download (optional)
+    extract_dir = ''
+    if isinstance(data, (str, Path)) and (is_zipfile(data) or is_tarfile(data)):
+        download(data, dir=f'{DATASETS_DIR}/{Path(data).stem}', unzip=True, delete=False, curl=False, threads=1)
+        data = next((DATASETS_DIR / Path(data).stem).rglob('*.yaml'))
+        extract_dir, autodownload = data.parent, False
+
+    # Read yaml (optional)
+    if isinstance(data, (str, Path)):
+        data = yaml_load(data)  # dictionary
+
+    # Checks
+    for k in 'train', 'val', 'names':
+        assert k in data, emojis(f"data.yaml '{k}:' field missing ❌")
+    if isinstance(data['names'], (list, tuple)):  # old array format
+        data['names'] = dict(enumerate(data['names']))  # convert to dict
+    assert all(isinstance(k, int) for k in data['names'].keys()), 'data.yaml names keys must be integers, i.e. 2: car'
+    data['nc'] = len(data['names'])
+
+    # Resolve paths
+    path = Path(extract_dir or data.get('path') or '')  # optional 'path' default to '.'
+    if not path.is_absolute():
+        path = (ROOT / path).resolve()
+        data['path'] = path  # download scripts
+    for k in 'train', 'val', 'test':
+        if data.get(k):  # prepend path
+            if isinstance(data[k], str):
+                x = (path / data[k]).resolve()
+                if not x.exists() and data[k].startswith('../'):
+                    x = (path / data[k][3:]).resolve()
+                data[k] = str(x)
+            else:
+                data[k] = [str((path / x).resolve()) for x in data[k]]
+
+    # Parse yaml
+    train, val, test, s = (data.get(x) for x in ('train', 'val', 'test', 'download'))
+    if val:
         val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])]  # val path
         if not all(x.exists() for x in val):
-            print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
-            if s and len(s):  # download script
-                if s.startswith('http') and s.endswith('.zip'):  # URL
-                    f = Path(s).name  # filename
-                    print(f'Downloading {s} ...')
-                    torch.hub.download_url_to_file(s, f)
-                    r = os.system(f'unzip -q {f} -d ../ && rm {f}')  # unzip
-                elif s.startswith('bash '):  # bash script
-                    print(f'Running {s} ...')
-                    r = os.system(s)
-                else:  # python script
-                    r = exec(s)  # return None
-                print('Dataset autodownload %s\n' % ('success' if r in (0, None) else 'failure'))  # print result
-            else:
-                raise Exception('Dataset not found.')
+            LOGGER.info('\nDataset not found ⚠️, missing paths %s' % [str(x) for x in val if not x.exists()])
+            if not s or not autodownload:
+                raise Exception('Dataset not found ❌')
+            t = time.time()
+            if s.startswith('http') and s.endswith('.zip'):  # URL
+                f = Path(s).name  # filename
+                LOGGER.info(f'Downloading {s} to {f}...')
+                torch.hub.download_url_to_file(s, f)
+                Path(DATASETS_DIR).mkdir(parents=True, exist_ok=True)  # create root
+                unzip_file(f, path=DATASETS_DIR)  # unzip
+                Path(f).unlink()  # remove zip
+                r = None  # success
+            elif s.startswith('bash '):  # bash script
+                LOGGER.info(f'Running {s} ...')
+                r = subprocess.run(s, shell=True)
+            else:  # python script
+                r = exec(s, {'yaml': data})  # return None
+            dt = f'({round(time.time() - t, 1)}s)'
+            s = f"success ✅ {dt}, saved to {colorstr('bold', DATASETS_DIR)}" if r in (0, None) else f'failure {dt} ❌'
+            LOGGER.info(f'Dataset download {s}')
+    check_font('Arial.ttf' if is_ascii(data['names']) else 'Arial.Unicode.ttf', progress=True)  # download fonts
+    return data  # dictionary
+
+
+def check_amp(model):
+    # Check PyTorch Automatic Mixed Precision (AMP) functionality. Return True on correct operation
+    from models.common import AutoShape, DetectMultiBackend
+
+    def amp_allclose(model, im):
+        # All close FP32 vs AMP results
+        m = AutoShape(model, verbose=False)  # model
+        a = m(im).xywhn[0]  # FP32 inference
+        m.amp = True
+        b = m(im).xywhn[0]  # AMP inference
+        return a.shape == b.shape and torch.allclose(a, b, atol=0.1)  # close to 10% absolute tolerance
+
+    prefix = colorstr('AMP: ')
+    device = next(model.parameters()).device  # get model device
+    if device.type in ('cpu', 'mps'):
+        return False  # AMP only used on CUDA devices
+    f = ROOT / 'data' / 'images' / 'bus.jpg'  # image to check
+    im = f if f.exists() else 'https://ultralytics.com/images/bus.jpg' if check_online() else np.ones((640, 640, 3))
+    try:
+        assert amp_allclose(deepcopy(model), im) or amp_allclose(DetectMultiBackend('yolov5n.pt', device), im)
+        LOGGER.info(f'{prefix}checks passed ✅')
+        return True
+    except Exception:
+        help_url = 'https://github.com/ultralytics/yolov5/issues/7908'
+        LOGGER.warning(f'{prefix}checks failed ❌, disabling Automatic Mixed Precision. See {help_url}')
+        return False
 
 
-def download(url, dir='.', multi_thread=False):
-    # Multi-threaded file download and unzip function
+def yaml_load(file='data.yaml'):
+    # Single-line safe yaml loading
+    with open(file, errors='ignore') as f:
+        return yaml.safe_load(f)
+
+
+def yaml_save(file='data.yaml', data={}):
+    # Single-line safe yaml saving
+    with open(file, 'w') as f:
+        yaml.safe_dump({k: str(v) if isinstance(v, Path) else v for k, v in data.items()}, f, sort_keys=False)
+
+
+def unzip_file(file, path=None, exclude=('.DS_Store', '__MACOSX')):
+    # Unzip a *.zip file to path/, excluding files containing strings in exclude list
+    if path is None:
+        path = Path(file).parent  # default path
+    with ZipFile(file) as zipObj:
+        for f in zipObj.namelist():  # list all archived filenames in the zip
+            if all(x not in f for x in exclude):
+                zipObj.extract(f, path=path)
+
+
+def url2file(url):
+    # Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt
+    url = str(Path(url)).replace(':/', '://')  # Pathlib turns :// -> :/
+    return Path(urllib.parse.unquote(url)).name.split('?')[0]  # '%2F' to '/', split https://url.com/file.txt?auth
+
+
+def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1, retry=3):
+    # Multithreaded file download and unzip function, used in data.yaml for autodownload
     def download_one(url, dir):
         # Download 1 file
-        f = dir / Path(url).name  # filename
-        if not f.exists():
-            print(f'Downloading {url} to {f}...')
-            torch.hub.download_url_to_file(url, f, progress=True)  # download
-        if f.suffix in ('.zip', '.gz'):
-            print(f'Unzipping {f}...')
-            if f.suffix == '.zip':
-                os.system(f'unzip -qo {f} -d {dir} && rm {f}')  # unzip -quiet -overwrite
+        success = True
+        if os.path.isfile(url):
+            f = Path(url)  # filename
+        else:  # does not exist
+            f = dir / Path(url).name
+            LOGGER.info(f'Downloading {url} to {f}...')
+            for i in range(retry + 1):
+                if curl:
+                    success = curl_download(url, f, silent=(threads > 1))
+                else:
+                    torch.hub.download_url_to_file(url, f, progress=threads == 1)  # torch download
+                    success = f.is_file()
+                if success:
+                    break
+                elif i < retry:
+                    LOGGER.warning(f'⚠️ Download failure, retrying {i + 1}/{retry} {url}...')
+                else:
+                    LOGGER.warning(f'❌ Failed to download {url}...')
+
+        if unzip and success and (f.suffix == '.gz' or is_zipfile(f) or is_tarfile(f)):
+            LOGGER.info(f'Unzipping {f}...')
+            if is_zipfile(f):
+                unzip_file(f, dir)  # unzip
+            elif is_tarfile(f):
+                subprocess.run(['tar', 'xf', f, '--directory', f.parent], check=True)  # unzip
             elif f.suffix == '.gz':
-                os.system(f'tar xfz {f} --directory {f.parent} && rm {f}')  # unzip
+                subprocess.run(['tar', 'xfz', f, '--directory', f.parent], check=True)  # unzip
+            if delete:
+                f.unlink()  # remove zip
 
     dir = Path(dir)
     dir.mkdir(parents=True, exist_ok=True)  # make directory
-    if multi_thread:
-        ThreadPool(8).imap(lambda x: download_one(*x), zip(url, repeat(dir)))  # 8 threads
+    if threads > 1:
+        pool = ThreadPool(threads)
+        pool.imap(lambda x: download_one(*x), zip(url, repeat(dir)))  # multithreaded
+        pool.close()
+        pool.join()
     else:
-        for u in tuple(url) if isinstance(url, str) else url:
+        for u in [url] if isinstance(url, (str, Path)) else url:
             download_one(u, dir)
 
 
 def make_divisible(x, divisor):
-    # Returns x evenly divisible by divisor
+    # Returns nearest x divisible by divisor
+    if isinstance(divisor, torch.Tensor):
+        divisor = int(divisor.max())  # to int
     return math.ceil(x / divisor) * divisor
 
 
 def clean_str(s):
     # Cleans a string by replacing special characters with underscore _
-    return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)
+    return re.sub(pattern='[|@#!¡·$€%&()=?¿^*;:,¨´><+]', repl='_', string=s)
 
 
 def one_cycle(y1=0.0, y2=1.0, steps=100):
-    # lambda function for sinusoidal ramp from y1 to y2
+    # lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf
     return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1
 
 
 def colorstr(*input):
     # Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e.  colorstr('blue', 'hello world')
     *args, string = input if len(input) > 1 else ('blue', 'bold', input[0])  # color arguments, string
-    colors = {'black': '\033[30m',  # basic colors
-              'red': '\033[31m',
-              'green': '\033[32m',
-              'yellow': '\033[33m',
-              'blue': '\033[34m',
-              'magenta': '\033[35m',
-              'cyan': '\033[36m',
-              'white': '\033[37m',
-              'bright_black': '\033[90m',  # bright colors
-              'bright_red': '\033[91m',
-              'bright_green': '\033[92m',
-              'bright_yellow': '\033[93m',
-              'bright_blue': '\033[94m',
-              'bright_magenta': '\033[95m',
-              'bright_cyan': '\033[96m',
-              'bright_white': '\033[97m',
-              'end': '\033[0m',  # misc
-              'bold': '\033[1m',
-              'underline': '\033[4m'}
+    colors = {
+        'black': '\033[30m',  # basic colors
+        'red': '\033[31m',
+        'green': '\033[32m',
+        'yellow': '\033[33m',
+        'blue': '\033[34m',
+        'magenta': '\033[35m',
+        'cyan': '\033[36m',
+        'white': '\033[37m',
+        'bright_black': '\033[90m',  # bright colors
+        'bright_red': '\033[91m',
+        'bright_green': '\033[92m',
+        'bright_yellow': '\033[93m',
+        'bright_blue': '\033[94m',
+        'bright_magenta': '\033[95m',
+        'bright_cyan': '\033[96m',
+        'bright_white': '\033[97m',
+        'end': '\033[0m',  # misc
+        'bold': '\033[1m',
+        'underline': '\033[4m'}
     return ''.join(colors[x] for x in args) + f'{string}' + colors['end']
 
 
@@ -253,7 +720,7 @@ def labels_to_class_weights(labels, nc=80):
         return torch.Tensor()
 
     labels = np.concatenate(labels, 0)  # labels.shape = (866643, 5) for COCO
-    classes = labels[:, 0].astype(np.int)  # labels = [class xywh]
+    classes = labels[:, 0].astype(int)  # labels = [class xywh]
     weights = np.bincount(classes, minlength=nc)  # occurrences per class
 
     # Prepend gridpoint count (for uCE training)
@@ -263,15 +730,14 @@ def labels_to_class_weights(labels, nc=80):
     weights[weights == 0] = 1  # replace empty bins with 1
     weights = 1 / weights  # number of targets per class
     weights /= weights.sum()  # normalize
-    return torch.from_numpy(weights)
+    return torch.from_numpy(weights).float()
 
 
 def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
     # Produces image weights based on class_weights and image contents
-    class_counts = np.array([np.bincount(x[:, 0].astype(np.int), minlength=nc) for x in labels])
-    image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1)
-    # index = random.choices(range(n), weights=image_weights, k=1)  # weight image sample
-    return image_weights
+    # Usage: index = random.choices(range(n), weights=image_weights, k=1)  # weighted image sample
+    class_counts = np.array([np.bincount(x[:, 0].astype(int), minlength=nc) for x in labels])
+    return (class_weights.reshape(1, nc) * class_counts).sum(1)
 
 
 def coco80_to_coco91_class():  # converts 80-index (val2014) to 91-index (paper)
@@ -280,47 +746,59 @@ def coco80_to_coco91_class():  # converts 80-index (val2014) to 91-index (paper)
     # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
     # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)]  # darknet to coco
     # x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)]  # coco to darknet
-    x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
-         35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-         64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
-    return x
+    return [
+        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
+        35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+        64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
 
 
 def xyxy2xywh(x):
     # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
-    y[:, 0] = (x[:, 0] + x[:, 2]) / 2  # x center
-    y[:, 1] = (x[:, 1] + x[:, 3]) / 2  # y center
-    y[:, 2] = x[:, 2] - x[:, 0]  # width
-    y[:, 3] = x[:, 3] - x[:, 1]  # height
+    y[..., 0] = (x[..., 0] + x[..., 2]) / 2  # x center
+    y[..., 1] = (x[..., 1] + x[..., 3]) / 2  # y center
+    y[..., 2] = x[..., 2] - x[..., 0]  # width
+    y[..., 3] = x[..., 3] - x[..., 1]  # height
     return y
 
 
 def xywh2xyxy(x):
     # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
-    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
-    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
-    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
-    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
+    y[..., 0] = x[..., 0] - x[..., 2] / 2  # top left x
+    y[..., 1] = x[..., 1] - x[..., 3] / 2  # top left y
+    y[..., 2] = x[..., 0] + x[..., 2] / 2  # bottom right x
+    y[..., 3] = x[..., 1] + x[..., 3] / 2  # bottom right y
     return y
 
 
 def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
     # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
-    y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw  # top left x
-    y[:, 1] = h * (x[:, 1] - x[:, 3] / 2) + padh  # top left y
-    y[:, 2] = w * (x[:, 0] + x[:, 2] / 2) + padw  # bottom right x
-    y[:, 3] = h * (x[:, 1] + x[:, 3] / 2) + padh  # bottom right y
+    y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw  # top left x
+    y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh  # top left y
+    y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw  # bottom right x
+    y[..., 3] = h * (x[..., 1] + x[..., 3] / 2) + padh  # bottom right y
+    return y
+
+
+def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
+    # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
+    if clip:
+        clip_boxes(x, (h - eps, w - eps))  # warning: inplace clip
+    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+    y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w  # x center
+    y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h  # y center
+    y[..., 2] = (x[..., 2] - x[..., 0]) / w  # width
+    y[..., 3] = (x[..., 3] - x[..., 1]) / h  # height
     return y
 
 
 def xyn2xy(x, w=640, h=640, padw=0, padh=0):
     # Convert normalized segments into pixel segments, shape (n,2)
     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
-    y[:, 0] = w * x[:, 0] + padw  # top left x
-    y[:, 1] = h * x[:, 1] + padh  # top left y
+    y[..., 0] = w * x[..., 0] + padw  # top left x
+    y[..., 1] = h * x[..., 1] + padh  # top left y
     return y
 
 
@@ -344,14 +822,15 @@ def segments2boxes(segments):
 def resample_segments(segments, n=1000):
     # Up-sample an (n,2) segment
     for i, s in enumerate(segments):
+        s = np.concatenate((s, s[0:1, :]), axis=0)
         x = np.linspace(0, len(s) - 1, n)
         xp = np.arange(len(s))
         segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T  # segment xy
     return segments
 
 
-def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
-    # Rescale coords (xyxy) from img1_shape to img0_shape
+def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
+    # Rescale boxes (xyxy) from img1_shape to img0_shape
     if ratio_pad is None:  # calculate from img0_shape
         gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
         pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
@@ -359,123 +838,97 @@ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
         gain = ratio_pad[0][0]
         pad = ratio_pad[1]
 
-    coords[:, [0, 2]] -= pad[0]  # x padding
-    coords[:, [1, 3]] -= pad[1]  # y padding
-    coords[:, :4] /= gain
-    clip_coords(coords, img0_shape)
-    return coords
-
-
-def clip_coords(boxes, img_shape):
-    # Clip bounding xyxy bounding boxes to image shape (height, width)
-    boxes[:, 0].clamp_(0, img_shape[1])  # x1
-    boxes[:, 1].clamp_(0, img_shape[0])  # y1
-    boxes[:, 2].clamp_(0, img_shape[1])  # x2
-    boxes[:, 3].clamp_(0, img_shape[0])  # y2
-
-
-def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
-    # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
-    box2 = box2.T
-
-    # Get the coordinates of bounding boxes
-    if x1y1x2y2:  # x1, y1, x2, y2 = box1
-        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
-        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
-    else:  # transform from xywh to xyxy
-        b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
-        b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
-        b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
-        b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
-
-    # Intersection area
-    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
-            (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
-
-    # Union Area
-    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
-    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
-    union = w1 * h1 + w2 * h2 - inter + eps
-
-    iou = inter / union
-    if GIoU or DIoU or CIoU:
-        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex (smallest enclosing box) width
-        ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
-        if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
-            c2 = cw ** 2 + ch ** 2 + eps  # convex diagonal squared
-            rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
-                    (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4  # center distance squared
-            if DIoU:
-                return iou - rho2 / c2  # DIoU
-            elif CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
-                v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
-                with torch.no_grad():
-                    alpha = v / (v - iou + (1 + eps))
-                return iou - (rho2 / c2 + v * alpha)  # CIoU
-        else:  # GIoU https://arxiv.org/pdf/1902.09630.pdf
-            c_area = cw * ch + eps  # convex area
-            return iou - (c_area - union) / c_area  # GIoU
-    else:
-        return iou  # IoU
+    boxes[..., [0, 2]] -= pad[0]  # x padding
+    boxes[..., [1, 3]] -= pad[1]  # y padding
+    boxes[..., :4] /= gain
+    clip_boxes(boxes, img0_shape)
+    return boxes
 
 
-def box_iou(box1, box2):
-    # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
-    """
-    Return intersection-over-union (Jaccard index) of boxes.
-    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
-    Arguments:
-        box1 (Tensor[N, 4])
-        box2 (Tensor[M, 4])
-    Returns:
-        iou (Tensor[N, M]): the NxM matrix containing the pairwise
-            IoU values for every element in boxes1 and boxes2
-    """
-
-    def box_area(box):
-        # box = 4xn
-        return (box[2] - box[0]) * (box[3] - box[1])
-
-    area1 = box_area(box1.T)
-    area2 = box_area(box2.T)
-
-    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
-    inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
-    return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)
-
+def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=False):
+    # Rescale coords (xyxy) from img1_shape to img0_shape
+    if ratio_pad is None:  # calculate from img0_shape
+        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
+        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
+    else:
+        gain = ratio_pad[0][0]
+        pad = ratio_pad[1]
 
-def wh_iou(wh1, wh2):
-    # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2
-    wh1 = wh1[:, None]  # [N,1,2]
-    wh2 = wh2[None]  # [1,M,2]
-    inter = torch.min(wh1, wh2).prod(2)  # [N,M]
-    return inter / (wh1.prod(2) + wh2.prod(2) - inter)  # iou = inter / (area1 + area2 - inter)
+    segments[:, 0] -= pad[0]  # x padding
+    segments[:, 1] -= pad[1]  # y padding
+    segments /= gain
+    clip_segments(segments, img0_shape)
+    if normalize:
+        segments[:, 0] /= img0_shape[1]  # width
+        segments[:, 1] /= img0_shape[0]  # height
+    return segments
 
 
-def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,
-                        labels=()):
-    """Runs Non-Maximum Suppression (NMS) on inference results
+def clip_boxes(boxes, shape):
+    # Clip boxes (xyxy) to image shape (height, width)
+    if isinstance(boxes, torch.Tensor):  # faster individually
+        boxes[..., 0].clamp_(0, shape[1])  # x1
+        boxes[..., 1].clamp_(0, shape[0])  # y1
+        boxes[..., 2].clamp_(0, shape[1])  # x2
+        boxes[..., 3].clamp_(0, shape[0])  # y2
+    else:  # np.array (faster grouped)
+        boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])  # x1, x2
+        boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])  # y1, y2
+
+
+def clip_segments(segments, shape):
+    # Clip segments (xy1,xy2,...) to image shape (height, width)
+    if isinstance(segments, torch.Tensor):  # faster individually
+        segments[:, 0].clamp_(0, shape[1])  # x
+        segments[:, 1].clamp_(0, shape[0])  # y
+    else:  # np.array (faster grouped)
+        segments[:, 0] = segments[:, 0].clip(0, shape[1])  # x
+        segments[:, 1] = segments[:, 1].clip(0, shape[0])  # y
+
+
+def non_max_suppression(
+        prediction,
+        conf_thres=0.25,
+        iou_thres=0.45,
+        classes=None,
+        agnostic=False,
+        multi_label=False,
+        labels=(),
+        max_det=300,
+        nm=0,  # number of masks
+):
+    """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections
 
     Returns:
          list of detections, on (n,6) tensor per image [xyxy, conf, cls]
     """
 
-    nc = prediction.shape[2] - 5  # number of classes
+    # Checks
+    assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
+    assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
+    if isinstance(prediction, (list, tuple)):  # YOLOv5 model in validation model, output = (inference_out, loss_out)
+        prediction = prediction[0]  # select only inference output
+
+    device = prediction.device
+    mps = 'mps' in device.type  # Apple MPS
+    if mps:  # MPS not fully supported yet, convert tensors to CPU before NMS
+        prediction = prediction.cpu()
+    bs = prediction.shape[0]  # batch size
+    nc = prediction.shape[2] - nm - 5  # number of classes
     xc = prediction[..., 4] > conf_thres  # candidates
 
     # Settings
-    min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
-    max_det = 300  # maximum number of detections per image
+    # min_wh = 2  # (pixels) minimum box width and height
+    max_wh = 7680  # (pixels) maximum box width and height
     max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
-    time_limit = 10.0  # seconds to quit after
+    time_limit = 0.5 + 0.05 * bs  # seconds to quit after
     redundant = True  # require redundant detections
     multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
     merge = False  # use merge-NMS
 
     t = time.time()
-    if not isinstance(prediction, torch.Tensor):
-        prediction = torch.as_tensor(prediction)
-    output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
+    mi = 5 + nc  # mask start index
+    output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
     for xi, x in enumerate(prediction):  # image index, image inference
         # Apply constraints
         # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
@@ -483,11 +936,11 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non
 
         # Cat apriori labels if autolabelling
         if labels and len(labels[xi]):
-            l = labels[xi]
-            v = torch.zeros((len(l), nc + 5), device=x.device)
-            v[:, :4] = l[:, 1:5]  # box
+            lb = labels[xi]
+            v = torch.zeros((len(lb), nc + nm + 5), device=x.device)
+            v[:, :4] = lb[:, 1:5]  # box
             v[:, 4] = 1.0  # conf
-            v[range(len(l)), l[:, 0].long() + 5] = 1.0  # cls
+            v[range(len(lb)), lb[:, 0].long() + 5] = 1.0  # cls
             x = torch.cat((x, v), 0)
 
         # If none remain process next image
@@ -497,16 +950,17 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non
         # Compute conf
         x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
 
-        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
-        box = xywh2xyxy(x[:, :4])
+        # Box/Mask
+        box = xywh2xyxy(x[:, :4])  # center_x, center_y, width, height) to (x1, y1, x2, y2)
+        mask = x[:, mi:]  # zero columns if no masks
 
         # Detections matrix nx6 (xyxy, conf, cls)
         if multi_label:
-            i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
-            x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
+            i, j = (x[:, 5:mi] > conf_thres).nonzero(as_tuple=False).T
+            x = torch.cat((box[i], x[i, 5 + j, None], j[:, None].float(), mask[i]), 1)
         else:  # best class only
-            conf, j = x[:, 5:].max(1, keepdim=True)
-            x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
+            conf, j = x[:, 5:mi].max(1, keepdim=True)
+            x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
 
         # Filter by class
         if classes is not None:
@@ -520,15 +974,13 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non
         n = x.shape[0]  # number of boxes
         if not n:  # no boxes
             continue
-        elif n > max_nms:  # excess boxes
-            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
+        x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence and remove excess boxes
 
         # Batched NMS
         c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
         boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
         i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
-        if i.shape[0] > max_det:  # limit detections
-            i = i[:max_det]
+        i = i[:max_det]  # limit detections
         if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
             # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
             iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
@@ -538,8 +990,10 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non
                 i = i[iou.sum(1) > 1]  # require redundancy
 
         output[xi] = x[i]
+        if mps:
+            output[xi] = output[xi].to(device)
         if (time.time() - t) > time_limit:
-            print(f'WARNING: NMS time limit {time_limit}s exceeded')
+            LOGGER.warning(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded')
             break  # time limit exceeded
 
     return output
@@ -550,7 +1004,7 @@ def strip_optimizer(f='best.pt', s=''):  # from utils.general import *; strip_op
     x = torch.load(f, map_location=torch.device('cpu'))
     if x.get('ema'):
         x['model'] = x['ema']  # replace model with ema
-    for k in 'optimizer', 'training_results', 'wandb_id', 'ema', 'updates':  # keys
+    for k in 'optimizer', 'best_fitness', 'ema', 'updates':  # keys
         x[k] = None
     x['epoch'] = -1
     x['model'].half()  # to FP16
@@ -558,42 +1012,51 @@ def strip_optimizer(f='best.pt', s=''):  # from utils.general import *; strip_op
         p.requires_grad = False
     torch.save(x, s or f)
     mb = os.path.getsize(s or f) / 1E6  # filesize
-    print(f"Optimizer stripped from {f},{(' saved as %s,' % s) if s else ''} {mb:.1f}MB")
+    LOGGER.info(f"Optimizer stripped from {f},{f' saved as {s},' if s else ''} {mb:.1f}MB")
 
 
-def print_mutation(hyp, results, yaml_file='hyp_evolved.yaml', bucket=''):
-    # Print mutation results to evolve.txt (for use with train.py --evolve)
-    a = '%10s' * len(hyp) % tuple(hyp.keys())  # hyperparam keys
-    b = '%10.3g' * len(hyp) % tuple(hyp.values())  # hyperparam values
-    c = '%10.4g' * len(results) % results  # results (P, R, mAP@0.5, mAP@0.5:0.95, val_losses x 3)
-    print('\n%s\n%s\nEvolved fitness: %s\n' % (a, b, c))
+def print_mutation(keys, results, hyp, save_dir, bucket, prefix=colorstr('evolve: ')):
+    evolve_csv = save_dir / 'evolve.csv'
+    evolve_yaml = save_dir / 'hyp_evolve.yaml'
+    keys = tuple(keys) + tuple(hyp.keys())  # [results + hyps]
+    keys = tuple(x.strip() for x in keys)
+    vals = results + tuple(hyp.values())
+    n = len(keys)
 
+    # Download (optional)
     if bucket:
-        url = 'gs://%s/evolve.txt' % bucket
-        if gsutil_getsize(url) > (os.path.getsize('evolve.txt') if os.path.exists('evolve.txt') else 0):
-            os.system('gsutil cp %s .' % url)  # download evolve.txt if larger than local
+        url = f'gs://{bucket}/evolve.csv'
+        if gsutil_getsize(url) > (evolve_csv.stat().st_size if evolve_csv.exists() else 0):
+            subprocess.run(['gsutil', 'cp', f'{url}', f'{save_dir}'])  # download evolve.csv if larger than local
 
-    with open('evolve.txt', 'a') as f:  # append result
-        f.write(c + b + '\n')
-    x = np.unique(np.loadtxt('evolve.txt', ndmin=2), axis=0)  # load unique rows
-    x = x[np.argsort(-fitness(x))]  # sort
-    np.savetxt('evolve.txt', x, '%10.3g')  # save sort by fitness
+    # Log to evolve.csv
+    s = '' if evolve_csv.exists() else (('%20s,' * n % keys).rstrip(',') + '\n')  # add header
+    with open(evolve_csv, 'a') as f:
+        f.write(s + ('%20.5g,' * n % vals).rstrip(',') + '\n')
 
     # Save yaml
-    for i, k in enumerate(hyp.keys()):
-        hyp[k] = float(x[0, i + 7])
-    with open(yaml_file, 'w') as f:
-        results = tuple(x[0, :7])
-        c = '%10.4g' * len(results) % results  # results (P, R, mAP@0.5, mAP@0.5:0.95, val_losses x 3)
-        f.write('# Hyperparameter Evolution Results\n# Generations: %g\n# Metrics: ' % len(x) + c + '\n\n')
-        yaml.safe_dump(hyp, f, sort_keys=False)
+    with open(evolve_yaml, 'w') as f:
+        data = pd.read_csv(evolve_csv, skipinitialspace=True)
+        data = data.rename(columns=lambda x: x.strip())  # strip keys
+        i = np.argmax(fitness(data.values[:, :4]))  #
+        generations = len(data)
+        f.write('# YOLOv5 Hyperparameter Evolution Results\n' + f'# Best generation: {i}\n' +
+                f'# Last generation: {generations - 1}\n' + '# ' + ', '.join(f'{x.strip():>20s}' for x in keys[:7]) +
+                '\n' + '# ' + ', '.join(f'{x:>20.5g}' for x in data.values[i, :7]) + '\n\n')
+        yaml.safe_dump(data.loc[i][7:].to_dict(), f, sort_keys=False)
+
+    # Print to screen
+    LOGGER.info(prefix + f'{generations} generations finished, current result:\n' + prefix +
+                ', '.join(f'{x.strip():>20s}' for x in keys) + '\n' + prefix + ', '.join(f'{x:20.5g}'
+                                                                                         for x in vals) + '\n\n')
 
     if bucket:
-        os.system('gsutil cp evolve.txt %s gs://%s' % (yaml_file, bucket))  # upload
+        subprocess.run(['gsutil', 'cp', f'{evolve_csv}', f'{evolve_yaml}', f'gs://{bucket}'])  # upload
 
 
 def apply_classifier(x, model, img, im0):
-    # Apply a second stage classifier to yolo outputs
+    # Apply a second stage classifier to YOLO outputs
+    # Example model = torchvision.models.__dict__['efficientnet_b0'](pretrained=True).to(device).eval()
     im0 = [im0] if isinstance(im0, np.ndarray) else im0
     for i, d in enumerate(x):  # per image
         if d is not None and len(d):
@@ -606,19 +1069,18 @@ def apply_classifier(x, model, img, im0):
             d[:, :4] = xywh2xyxy(b).long()
 
             # Rescale boxes from img_size to im0 size
-            scale_coords(img.shape[2:], d[:, :4], im0[i].shape)
+            scale_boxes(img.shape[2:], d[:, :4], im0[i].shape)
 
             # Classes
             pred_cls1 = d[:, 5].long()
             ims = []
-            for j, a in enumerate(d):  # per item
+            for a in d:
                 cutout = im0[i][int(a[1]):int(a[3]), int(a[0]):int(a[2])]
                 im = cv2.resize(cutout, (224, 224))  # BGR
-                # cv2.imwrite('test%i.jpg' % j, cutout)
 
                 im = im[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
                 im = np.ascontiguousarray(im, dtype=np.float32)  # uint8 to float32
-                im /= 255.0  # 0 - 255 to 0.0 - 1.0
+                im /= 255  # 0 - 255 to 0.0 - 1.0
                 ims.append(im)
 
             pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1)  # classifier prediction
@@ -627,31 +1089,52 @@ def apply_classifier(x, model, img, im0):
     return x
 
 
-def save_one_box(xyxy, im, file='image.jpg', gain=1.02, pad=10, square=False, BGR=False):
-    # Save an image crop as {file} with crop size multiplied by {gain} and padded by {pad} pixels
-    xyxy = torch.tensor(xyxy).view(-1, 4)
-    b = xyxy2xywh(xyxy)  # boxes
-    if square:
-        b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1)  # attempt rectangle to square
-    b[:, 2:] = b[:, 2:] * gain + pad  # box wh * gain + pad
-    xyxy = xywh2xyxy(b).long()
-    clip_coords(xyxy, im.shape)
-    crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2])]
-    cv2.imwrite(str(increment_path(file, mkdir=True).with_suffix('.jpg')), crop if BGR else crop[..., ::-1])
-
-
 def increment_path(path, exist_ok=False, sep='', mkdir=False):
     # Increment file or directory path, i.e. runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc.
     path = Path(path)  # os-agnostic
     if path.exists() and not exist_ok:
-        suffix = path.suffix
-        path = path.with_suffix('')
-        dirs = glob.glob(f"{path}{sep}*")  # similar paths
-        matches = [re.search(rf"%s{sep}(\d+)" % path.stem, d) for d in dirs]
-        i = [int(m.groups()[0]) for m in matches if m]  # indices
-        n = max(i) + 1 if i else 2  # increment number
-        path = Path(f"{path}{sep}{n}{suffix}")  # update path
-    dir = path if path.suffix == '' else path.parent  # directory
-    if not dir.exists() and mkdir:
-        dir.mkdir(parents=True, exist_ok=True)  # make directory
+        path, suffix = (path.with_suffix(''), path.suffix) if path.is_file() else (path, '')
+
+        # Method 1
+        for n in range(2, 9999):
+            p = f'{path}{sep}{n}{suffix}'  # increment path
+            if not os.path.exists(p):  #
+                break
+        path = Path(p)
+
+        # Method 2 (deprecated)
+        # dirs = glob.glob(f"{path}{sep}*")  # similar paths
+        # matches = [re.search(rf"{path.stem}{sep}(\d+)", d) for d in dirs]
+        # i = [int(m.groups()[0]) for m in matches if m]  # indices
+        # n = max(i) + 1 if i else 2  # increment number
+        # path = Path(f"{path}{sep}{n}{suffix}")  # increment path
+
+    if mkdir:
+        path.mkdir(parents=True, exist_ok=True)  # make directory
+
     return path
+
+
+# OpenCV Multilanguage-friendly functions ------------------------------------------------------------------------------------
+imshow_ = cv2.imshow  # copy to avoid recursion errors
+
+
+def imread(filename, flags=cv2.IMREAD_COLOR):
+    return cv2.imdecode(np.fromfile(filename, np.uint8), flags)
+
+
+def imwrite(filename, img):
+    try:
+        cv2.imencode(Path(filename).suffix, img)[1].tofile(filename)
+        return True
+    except Exception:
+        return False
+
+
+def imshow(path, im):
+    imshow_(path.encode('unicode_escape').decode(), im)
+
+
+cv2.imread, cv2.imwrite, cv2.imshow = imread, imwrite, imshow  # redefine
+
+# Variables ------------------------------------------------------------------------------------------------------------
diff --git a/utils/google_app_engine/additional_requirements.txt b/utils/google_app_engine/additional_requirements.txt
index 5fcc30524a59..d5b76758c876 100644
--- a/utils/google_app_engine/additional_requirements.txt
+++ b/utils/google_app_engine/additional_requirements.txt
@@ -1,4 +1,5 @@
 # add these requirements in your app on top of the existing ones
-pip==18.1
+pip==21.1
 Flask==1.0.2
-gunicorn==19.9.0
+gunicorn==19.10.0
+werkzeug>=2.2.3 # not directly required, pinned by Snyk to avoid a vulnerability
diff --git a/utils/google_app_engine/app.yaml b/utils/google_app_engine/app.yaml
index ac29d104b144..5056b7c1186d 100644
--- a/utils/google_app_engine/app.yaml
+++ b/utils/google_app_engine/app.yaml
@@ -11,4 +11,4 @@ manual_scaling:
 resources:
   cpu: 1
   memory_gb: 4
-  disk_size_gb: 20
\ No newline at end of file
+  disk_size_gb: 20
diff --git a/utils/google_utils.py b/utils/google_utils.py
deleted file mode 100644
index 16f5afba169b..000000000000
--- a/utils/google_utils.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Google utils: https://cloud.google.com/storage/docs/reference/libraries
-
-import os
-import platform
-import subprocess
-import time
-from pathlib import Path
-
-import requests
-import torch
-
-
-def gsutil_getsize(url=''):
-    # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
-    s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8')
-    return eval(s.split(' ')[0]) if len(s) else 0  # bytes
-
-
-def attempt_download(file, repo='ultralytics/yolov5'):
-    # Attempt file download if does not exist
-    file = Path(str(file).strip().replace("'", ''))
-
-    if not file.exists():
-        try:
-            response = requests.get(f'https://api.github.com/repos/{repo}/releases/latest').json()  # github api
-            assets = [x['name'] for x in response['assets']]  # release assets, i.e. ['yolov5s.pt', 'yolov5m.pt', ...]
-            tag = response['tag_name']  # i.e. 'v1.0'
-        except:  # fallback plan
-            assets = ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt',
-                      'yolov5s6.pt', 'yolov5m6.pt', 'yolov5l6.pt', 'yolov5x6.pt']
-            try:
-                tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1]
-            except:
-                tag = 'v5.0'  # current release
-
-        tag = 'v5.0'
-
-        name = file.name
-        if name in assets:
-            msg = f'{file} missing, try downloading from https://github.com/{repo}/releases/'
-            redundant = False  # second download option
-            try:  # GitHub
-                url = f'https://github.com/{repo}/releases/download/{tag}/{name}'
-                print(f'Downloading {url} to {file}...')
-                torch.hub.download_url_to_file(url, file)
-                assert file.exists() and file.stat().st_size > 1E6  # check
-            except Exception as e:  # GCP
-                print(f'Download error: {e}')
-                assert redundant, 'No secondary mirror'
-                url = f'https://storage.googleapis.com/{repo}/ckpt/{name}'
-                print(f'Downloading {url} to {file}...')
-                os.system(f'curl -L {url} -o {file}')  # torch.hub.download_url_to_file(url, weights)
-            finally:
-                if not file.exists() or file.stat().st_size < 1E6:  # check
-                    file.unlink(missing_ok=True)  # remove partial downloads
-                    print(f'ERROR: Download failure: {msg}')
-                print('')
-                return
-
-
-def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'):
-    # Downloads a file from Google Drive. from yolov5.utils.google_utils import *; gdrive_download()
-    t = time.time()
-    file = Path(file)
-    cookie = Path('cookie')  # gdrive cookie
-    print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='')
-    file.unlink(missing_ok=True)  # remove existing file
-    cookie.unlink(missing_ok=True)  # remove existing cookie
-
-    # Attempt file download
-    out = "NUL" if platform.system() == "Windows" else "/dev/null"
-    os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}')
-    if os.path.exists('cookie'):  # large file
-        s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}'
-    else:  # small file
-        s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"'
-    r = os.system(s)  # execute, capture return
-    cookie.unlink(missing_ok=True)  # remove existing cookie
-
-    # Error check
-    if r != 0:
-        file.unlink(missing_ok=True)  # remove partial
-        print('Download error ')  # raise Exception('Download error')
-        return r
-
-    # Unzip if archive
-    if file.suffix == '.zip':
-        print('unzipping... ', end='')
-        os.system(f'unzip -q {file}')  # unzip
-        file.unlink()  # remove zip to free space
-
-    print(f'Done ({time.time() - t:.1f}s)')
-    return r
-
-
-def get_token(cookie="./cookie"):
-    with open(cookie) as f:
-        for line in f:
-            if "download" in line:
-                return line.split()[-1]
-    return ""
-
-# def upload_blob(bucket_name, source_file_name, destination_blob_name):
-#     # Uploads a file to a bucket
-#     # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
-#
-#     storage_client = storage.Client()
-#     bucket = storage_client.get_bucket(bucket_name)
-#     blob = bucket.blob(destination_blob_name)
-#
-#     blob.upload_from_filename(source_file_name)
-#
-#     print('File {} uploaded to {}.'.format(
-#         source_file_name,
-#         destination_blob_name))
-#
-#
-# def download_blob(bucket_name, source_blob_name, destination_file_name):
-#     # Uploads a blob from a bucket
-#     storage_client = storage.Client()
-#     bucket = storage_client.get_bucket(bucket_name)
-#     blob = bucket.blob(source_blob_name)
-#
-#     blob.download_to_filename(destination_file_name)
-#
-#     print('Blob {} downloaded to {}.'.format(
-#         source_blob_name,
-#         destination_file_name))
diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py
new file mode 100644
index 000000000000..19f5f1ae051c
--- /dev/null
+++ b/utils/loggers/__init__.py
@@ -0,0 +1,418 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Logging utils
+"""
+
+import os
+import warnings
+from pathlib import Path
+
+import pkg_resources as pkg
+import torch
+from sly_metrics import send_metrics
+from sly_train_utils import upload_label_vis
+from torch.utils.tensorboard import SummaryWriter
+
+from utils.general import LOGGER, colorstr, cv2
+from utils.loggers.clearml.clearml_utils import ClearmlLogger
+from utils.loggers.wandb.wandb_utils import WandbLogger
+from utils.plots import plot_images, plot_labels, plot_results
+from utils.torch_utils import de_parallel
+
+LOGGERS = ('csv', 'tb', 'wandb', 'clearml', 'comet')  # *.csv, TensorBoard, Weights & Biases, ClearML
+RANK = int(os.getenv('RANK', -1))
+
+try:
+    import wandb
+
+    assert hasattr(wandb, '__version__')  # verify package import not local dir
+    if pkg.parse_version(wandb.__version__) >= pkg.parse_version('0.12.2') and RANK in {0, -1}:
+        try:
+            wandb_login_success = wandb.login(timeout=30)
+        except wandb.errors.UsageError:  # known non-TTY terminal issue
+            wandb_login_success = False
+        if not wandb_login_success:
+            wandb = None
+except (ImportError, AssertionError):
+    wandb = None
+
+try:
+    import clearml
+
+    assert hasattr(clearml, '__version__')  # verify package import not local dir
+except (ImportError, AssertionError):
+    clearml = None
+
+try:
+    if RANK not in [0, -1]:
+        comet_ml = None
+    else:
+        import comet_ml
+
+        assert hasattr(comet_ml, '__version__')  # verify package import not local dir
+        from utils.loggers.comet import CometLogger
+
+except (ModuleNotFoundError, ImportError, AssertionError):
+    comet_ml = None
+
+
+class Loggers():
+    # YOLOv5 Loggers class
+    def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, include=LOGGERS):
+        self.save_dir = save_dir
+        self.weights = weights
+        self.opt = opt
+        self.hyp = hyp
+        self.plots = not opt.noplots  # plot results
+        self.logger = logger  # for printing results to console
+        self.include = include
+        self.keys = [
+            'train/box_loss',
+            'train/obj_loss',
+            'train/cls_loss',  # train loss
+            'metrics/precision',
+            'metrics/recall',
+            'metrics/mAP_0.5',
+            'metrics/mAP_0.5:0.95',  # metrics
+            'val/box_loss',
+            'val/obj_loss',
+            'val/cls_loss',  # val loss
+            'x/lr0',
+            'x/lr1',
+            'x/lr2']  # params
+        self.best_keys = ['best/epoch', 'best/precision', 'best/recall', 'best/mAP_0.5', 'best/mAP_0.5:0.95']
+        for k in LOGGERS:
+            setattr(self, k, None)  # init empty logger dictionary
+        self.csv = True  # always log to csv
+
+        # Messages
+        if not clearml:
+            prefix = colorstr('ClearML: ')
+            s = f"{prefix}run 'pip install clearml' to automatically track, visualize and remotely train YOLOv5 🚀 in ClearML"
+            self.logger.info(s)
+        if not comet_ml:
+            prefix = colorstr('Comet: ')
+            s = f"{prefix}run 'pip install comet_ml' to automatically track and visualize YOLOv5 🚀 runs in Comet"
+            self.logger.info(s)
+        # TensorBoard
+        s = self.save_dir
+        if 'tb' in self.include and not self.opt.evolve:
+            prefix = colorstr('TensorBoard: ')
+            self.logger.info(f"{prefix}Start with 'tensorboard --logdir {s.parent}', view at http://localhost:6006/")
+            self.tb = SummaryWriter(str(s))
+
+        # W&B
+        if wandb and 'wandb' in self.include:
+            self.opt.hyp = self.hyp  # add hyperparameters
+            self.wandb = WandbLogger(self.opt)
+        else:
+            self.wandb = None
+
+        # ClearML
+        if clearml and 'clearml' in self.include:
+            try:
+                self.clearml = ClearmlLogger(self.opt, self.hyp)
+            except Exception:
+                self.clearml = None
+                prefix = colorstr('ClearML: ')
+                LOGGER.warning(f'{prefix}WARNING ⚠️ ClearML is installed but not configured, skipping ClearML logging.'
+                               f' See https://github.com/ultralytics/yolov5/tree/master/utils/loggers/clearml#readme')
+
+        else:
+            self.clearml = None
+
+        # Comet
+        if comet_ml and 'comet' in self.include:
+            if isinstance(self.opt.resume, str) and self.opt.resume.startswith('comet://'):
+                run_id = self.opt.resume.split('/')[-1]
+                self.comet_logger = CometLogger(self.opt, self.hyp, run_id=run_id)
+
+            else:
+                self.comet_logger = CometLogger(self.opt, self.hyp)
+
+        else:
+            self.comet_logger = None
+
+    @property
+    def remote_dataset(self):
+        # Get data_dict if custom dataset artifact link is provided
+        data_dict = None
+        if self.clearml:
+            data_dict = self.clearml.data_dict
+        if self.wandb:
+            data_dict = self.wandb.data_dict
+        if self.comet_logger:
+            data_dict = self.comet_logger.data_dict
+
+        return data_dict
+
+    def on_train_start(self):
+        if self.comet_logger:
+            self.comet_logger.on_train_start()
+
+    def on_pretrain_routine_start(self):
+        if self.comet_logger:
+            self.comet_logger.on_pretrain_routine_start()
+
+    def on_pretrain_routine_end(self, labels, names):
+        # Callback runs on pre-train routine end
+        if self.plots:
+            plot_labels(labels, names, self.save_dir)
+            paths = self.save_dir.glob('*labels*.jpg')  # training labels
+            if self.wandb:
+                self.wandb.log({'Labels': [wandb.Image(str(x), caption=x.name) for x in paths]})
+            # if self.clearml:
+            #    pass  # ClearML saves these images automatically using hooks
+            if self.comet_logger:
+                self.comet_logger.on_pretrain_routine_end(paths)
+            if self.opt.sly:
+                upload_label_vis()
+
+    def on_train_batch_end(self, model, ni, imgs, targets, paths, vals):
+        log_dict = dict(zip(self.keys[:3], vals))
+        # Callback runs on train batch end
+        # ni: number integrated batches (since train start)
+        if self.plots:
+            if ni < 3:
+                f = self.save_dir / f'train_batch{ni}.jpg'  # filename
+                plot_images(imgs, targets, paths, f)
+                if ni == 0 and self.tb and not self.opt.sync_bn:
+                    log_tensorboard_graph(self.tb, model, imgsz=(self.opt.imgsz, self.opt.imgsz))
+            if ni == 10 and (self.wandb or self.clearml):
+                files = sorted(self.save_dir.glob('train*.jpg'))
+                if self.wandb:
+                    self.wandb.log({'Mosaics': [wandb.Image(str(f), caption=f.name) for f in files if f.exists()]})
+                if self.clearml:
+                    self.clearml.log_debug_samples(files, title='Mosaics')
+
+        if self.comet_logger:
+            self.comet_logger.on_train_batch_end(log_dict, step=ni)
+
+    def on_train_epoch_end(self, epoch):
+        # Callback runs on train epoch end
+        if self.wandb:
+            self.wandb.current_epoch = epoch + 1
+
+        if self.comet_logger:
+            self.comet_logger.on_train_epoch_end(epoch)
+
+    def on_val_start(self):
+        if self.comet_logger:
+            self.comet_logger.on_val_start()
+
+    def on_val_image_end(self, pred, predn, path, names, im):
+        # Callback runs on val image end
+        if self.wandb:
+            self.wandb.val_one_image(pred, predn, path, names, im)
+        if self.clearml:
+            self.clearml.log_image_with_boxes(path, pred, names, im)
+
+    def on_val_batch_end(self, batch_i, im, targets, paths, shapes, out):
+        if self.comet_logger:
+            self.comet_logger.on_val_batch_end(batch_i, im, targets, paths, shapes, out)
+
+    def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix):
+        # Callback runs on val end
+        if self.wandb or self.clearml:
+            files = sorted(self.save_dir.glob('val*.jpg'))
+        if self.wandb:
+            self.wandb.log({'Validation': [wandb.Image(str(f), caption=f.name) for f in files]})
+        if self.clearml:
+            self.clearml.log_debug_samples(files, title='Validation')
+
+        if self.comet_logger:
+            self.comet_logger.on_val_end(nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix)
+
+    def on_fit_epoch_end(self, vals, epoch, best_fitness, fi):
+        # Callback runs at the end of each fit (train+val) epoch
+        x = dict(zip(self.keys, vals))
+
+        if self.opt.sly:
+            metrics = {}
+
+        if self.csv:
+            file = self.save_dir / 'results.csv'
+            n = len(x) + 1  # number of cols
+            s = '' if file.exists() else (('%20s,' * n % tuple(['epoch'] + self.keys)).rstrip(',') + '\n')  # add header
+            with open(file, 'a') as f:
+                f.write(s + ('%20.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n')
+
+        if self.tb:
+            for k, v in x.items():
+                self.tb.add_scalar(k, v, epoch)
+        elif self.clearml:  # log to ClearML if TensorBoard not used
+            for k, v in x.items():
+                title, series = k.split('/')
+                self.clearml.task.get_logger().report_scalar(title, series, v, epoch)
+        elif self.opt.sly:
+            for k, v in x.items():
+                if torch.is_tensor(v):
+                    v = float(v.cpu().numpy())
+                metrics[k] = v
+
+        if self.wandb:
+            if best_fitness == fi:
+                best_results = [epoch] + vals[3:7]
+                for i, name in enumerate(self.best_keys):
+                    self.wandb.wandb_run.summary[name] = best_results[i]  # log best results in the summary
+            self.wandb.log(x)
+            self.wandb.end_epoch()
+
+        if self.clearml:
+            self.clearml.current_epoch_logged_images = set()  # reset epoch image limit
+            self.clearml.current_epoch += 1
+
+        if self.comet_logger:
+            self.comet_logger.on_fit_epoch_end(x, epoch=epoch)
+
+        if self.opt.sly:
+            send_metrics(epoch, self.opt.epochs, metrics, self.opt.metrics_period)
+
+
+    def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
+        # Callback runs on model save event
+        if (epoch + 1) % self.opt.save_period == 0 and not final_epoch and self.opt.save_period != -1:
+            if self.wandb:
+                self.wandb.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi)
+            if self.clearml:
+                self.clearml.task.update_output_model(model_path=str(last),
+                                                      model_name='Latest Model',
+                                                      auto_delete_file=False)
+
+        if self.comet_logger:
+            self.comet_logger.on_model_save(last, epoch, final_epoch, best_fitness, fi)
+
+    def on_train_end(self, last, best, epoch, results):
+        # Callback runs on training end, i.e. saving best model
+        if self.plots:
+            plot_results(file=self.save_dir / 'results.csv')  # save results.png
+        files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))]
+        files = [(self.save_dir / f) for f in files if (self.save_dir / f).exists()]  # filter
+        self.logger.info(f"Results saved to {colorstr('bold', self.save_dir)}")
+
+        if self.tb and not self.clearml:  # These images are already captured by ClearML by now, we don't want doubles
+            for f in files:
+                self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC')
+
+        if self.wandb:
+            self.wandb.log(dict(zip(self.keys[3:10], results)))
+            self.wandb.log({'Results': [wandb.Image(str(f), caption=f.name) for f in files]})
+            # Calling wandb.log. TODO: Refactor this into WandbLogger.log_model
+            if not self.opt.evolve:
+                wandb.log_artifact(str(best if best.exists() else last),
+                                   type='model',
+                                   name=f'run_{self.wandb.wandb_run.id}_model',
+                                   aliases=['latest', 'best', 'stripped'])
+            self.wandb.finish_run()
+
+        if self.clearml and not self.opt.evolve:
+            self.clearml.task.update_output_model(model_path=str(best if best.exists() else last),
+                                                  name='Best Model',
+                                                  auto_delete_file=False)
+
+        if self.comet_logger:
+            final_results = dict(zip(self.keys[3:10], results))
+            self.comet_logger.on_train_end(files, self.save_dir, last, best, epoch, final_results)
+
+    def on_params_update(self, params: dict):
+        # Update hyperparams or configs of the experiment
+        if self.wandb:
+            self.wandb.wandb_run.config.update(params, allow_val_change=True)
+        if self.comet_logger:
+            self.comet_logger.on_params_update(params)
+
+
+class GenericLogger:
+    """
+    YOLOv5 General purpose logger for non-task specific logging
+    Usage: from utils.loggers import GenericLogger; logger = GenericLogger(...)
+    Arguments
+        opt:             Run arguments
+        console_logger:  Console logger
+        include:         loggers to include
+    """
+
+    def __init__(self, opt, console_logger, include=('tb', 'wandb')):
+        # init default loggers
+        self.save_dir = Path(opt.save_dir)
+        self.include = include
+        self.console_logger = console_logger
+        self.csv = self.save_dir / 'results.csv'  # CSV logger
+        if 'tb' in self.include:
+            prefix = colorstr('TensorBoard: ')
+            self.console_logger.info(
+                f"{prefix}Start with 'tensorboard --logdir {self.save_dir.parent}', view at http://localhost:6006/")
+            self.tb = SummaryWriter(str(self.save_dir))
+
+        if wandb and 'wandb' in self.include:
+            self.wandb = wandb.init(project=web_project_name(str(opt.project)),
+                                    name=None if opt.name == 'exp' else opt.name,
+                                    config=opt)
+        else:
+            self.wandb = None
+
+    def log_metrics(self, metrics, epoch):
+        # Log metrics dictionary to all loggers
+        if self.csv:
+            keys, vals = list(metrics.keys()), list(metrics.values())
+            n = len(metrics) + 1  # number of cols
+            s = '' if self.csv.exists() else (('%23s,' * n % tuple(['epoch'] + keys)).rstrip(',') + '\n')  # header
+            with open(self.csv, 'a') as f:
+                f.write(s + ('%23.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n')
+
+        if self.tb:
+            for k, v in metrics.items():
+                self.tb.add_scalar(k, v, epoch)
+
+        if self.wandb:
+            self.wandb.log(metrics, step=epoch)
+
+    def log_images(self, files, name='Images', epoch=0):
+        # Log images to all loggers
+        files = [Path(f) for f in (files if isinstance(files, (tuple, list)) else [files])]  # to Path
+        files = [f for f in files if f.exists()]  # filter by exists
+
+        if self.tb:
+            for f in files:
+                self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC')
+
+        if self.wandb:
+            self.wandb.log({name: [wandb.Image(str(f), caption=f.name) for f in files]}, step=epoch)
+
+    def log_graph(self, model, imgsz=(640, 640)):
+        # Log model graph to all loggers
+        if self.tb:
+            log_tensorboard_graph(self.tb, model, imgsz)
+
+    def log_model(self, model_path, epoch=0, metadata={}):
+        # Log model to all loggers
+        if self.wandb:
+            art = wandb.Artifact(name=f'run_{wandb.run.id}_model', type='model', metadata=metadata)
+            art.add_file(str(model_path))
+            wandb.log_artifact(art)
+
+    def update_params(self, params):
+        # Update the parameters logged
+        if self.wandb:
+            wandb.run.config.update(params, allow_val_change=True)
+
+
+def log_tensorboard_graph(tb, model, imgsz=(640, 640)):
+    # Log model graph to TensorBoard
+    try:
+        p = next(model.parameters())  # for device, type
+        imgsz = (imgsz, imgsz) if isinstance(imgsz, int) else imgsz  # expand
+        im = torch.zeros((1, 3, *imgsz)).to(p.device).type_as(p)  # input image (WARNING: must be zeros, not empty)
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore')  # suppress jit trace warning
+            tb.add_graph(torch.jit.trace(de_parallel(model), im, strict=False), [])
+    except Exception as e:
+        LOGGER.warning(f'WARNING ⚠️ TensorBoard graph visualization failure {e}')
+
+
+def web_project_name(project):
+    # Convert local project name to web project name
+    if not project.startswith('runs/train'):
+        return project
+    suffix = '-Classify' if project.endswith('-cls') else '-Segment' if project.endswith('-seg') else ''
+    return f'YOLOv5{suffix}'
diff --git a/utils/loggers/clearml/README.md b/utils/loggers/clearml/README.md
new file mode 100644
index 000000000000..ca41c040193c
--- /dev/null
+++ b/utils/loggers/clearml/README.md
@@ -0,0 +1,237 @@
+# ClearML Integration
+
+<img align="center" src="https://github.com/thepycoder/clearml_screenshots/raw/main/logos_dark.png#gh-light-mode-only" alt="Clear|ML"><img align="center" src="https://github.com/thepycoder/clearml_screenshots/raw/main/logos_light.png#gh-dark-mode-only" alt="Clear|ML">
+
+## About ClearML
+
+[ClearML](https://cutt.ly/yolov5-tutorial-clearml) is an [open-source](https://github.com/allegroai/clearml) toolbox designed to save you time ⏱️.
+
+🔨 Track every YOLOv5 training run in the <b>experiment manager</b>
+
+🔧 Version and easily access your custom training data with the integrated ClearML <b>Data Versioning Tool</b>
+
+🔦 <b>Remotely train and monitor</b> your YOLOv5 training runs using ClearML Agent
+
+🔬 Get the very best mAP using ClearML <b>Hyperparameter Optimization</b>
+
+🔭 Turn your newly trained <b>YOLOv5 model into an API</b> with just a few commands using ClearML Serving
+
+<br />
+And so much more. It's up to you how many of these tools you want to use, you can stick to the experiment manager, or chain them all together into an impressive pipeline!
+<br />
+<br />
+
+![ClearML scalars dashboard](https://github.com/thepycoder/clearml_screenshots/raw/main/experiment_manager_with_compare.gif)
+
+<br />
+<br />
+
+## 🦾 Setting Things Up
+
+To keep track of your experiments and/or data, ClearML needs to communicate to a server. You have 2 options to get one:
+
+Either sign up for free to the [ClearML Hosted Service](https://cutt.ly/yolov5-tutorial-clearml) or you can set up your own server, see [here](https://clear.ml/docs/latest/docs/deploying_clearml/clearml_server). Even the server is open-source, so even if you're dealing with sensitive data, you should be good to go!
+
+1. Install the `clearml` python package:
+
+   ```bash
+   pip install clearml
+   ```
+
+1. Connect the ClearML SDK to the server by [creating credentials](https://app.clear.ml/settings/workspace-configuration) (go right top to Settings -> Workspace -> Create new credentials), then execute the command below and follow the instructions:
+
+   ```bash
+   clearml-init
+   ```
+
+That's it! You're done 😎
+
+<br />
+
+## 🚀 Training YOLOv5 With ClearML
+
+To enable ClearML experiment tracking, simply install the ClearML pip package.
+
+```bash
+pip install clearml>=1.2.0
+```
+
+This will enable integration with the YOLOv5 training script. Every training run from now on, will be captured and stored by the ClearML experiment manager.
+
+If you want to change the `project_name` or `task_name`, use the `--project` and `--name` arguments of the `train.py` script, by default the project will be called `YOLOv5` and the task `Training`.
+PLEASE NOTE: ClearML uses `/` as a delimiter for subprojects, so be careful when using `/` in your project name!
+
+```bash
+python train.py --img 640 --batch 16 --epochs 3 --data coco128.yaml --weights yolov5s.pt --cache
+```
+
+or with custom project and task name:
+
+```bash
+python train.py --project my_project --name my_training --img 640 --batch 16 --epochs 3 --data coco128.yaml --weights yolov5s.pt --cache
+```
+
+This will capture:
+
+- Source code + uncommitted changes
+- Installed packages
+- (Hyper)parameters
+- Model files (use `--save-period n` to save a checkpoint every n epochs)
+- Console output
+- Scalars (mAP_0.5, mAP_0.5:0.95, precision, recall, losses, learning rates, ...)
+- General info such as machine details, runtime, creation date etc.
+- All produced plots such as label correlogram and confusion matrix
+- Images with bounding boxes per epoch
+- Mosaic per epoch
+- Validation images per epoch
+- ...
+
+That's a lot right? 🤯
+Now, we can visualize all of this information in the ClearML UI to get an overview of our training progress. Add custom columns to the table view (such as e.g. mAP_0.5) so you can easily sort on the best performing model. Or select multiple experiments and directly compare them!
+
+There even more we can do with all of this information, like hyperparameter optimization and remote execution, so keep reading if you want to see how that works!
+
+<br />
+
+## 🔗 Dataset Version Management
+
+Versioning your data separately from your code is generally a good idea and makes it easy to acquire the latest version too. This repository supports supplying a dataset version ID, and it will make sure to get the data if it's not there yet. Next to that, this workflow also saves the used dataset ID as part of the task parameters, so you will always know for sure which data was used in which experiment!
+
+![ClearML Dataset Interface](https://github.com/thepycoder/clearml_screenshots/raw/main/clearml_data.gif)
+
+### Prepare Your Dataset
+
+The YOLOv5 repository supports a number of different datasets by using yaml files containing their information. By default datasets are downloaded to the `../datasets` folder in relation to the repository root folder. So if you downloaded the `coco128` dataset using the link in the yaml or with the scripts provided by yolov5, you get this folder structure:
+
+```
+..
+|_ yolov5
+|_ datasets
+    |_ coco128
+        |_ images
+        |_ labels
+        |_ LICENSE
+        |_ README.txt
+```
+
+But this can be any dataset you wish. Feel free to use your own, as long as you keep to this folder structure.
+
+Next, ⚠️**copy the corresponding yaml file to the root of the dataset folder**⚠️. This yaml files contains the information ClearML will need to properly use the dataset. You can make this yourself too, of course, just follow the structure of the example yamls.
+
+Basically we need the following keys: `path`, `train`, `test`, `val`, `nc`, `names`.
+
+```
+..
+|_ yolov5
+|_ datasets
+    |_ coco128
+        |_ images
+        |_ labels
+        |_ coco128.yaml  # <---- HERE!
+        |_ LICENSE
+        |_ README.txt
+```
+
+### Upload Your Dataset
+
+To get this dataset into ClearML as a versioned dataset, go to the dataset root folder and run the following command:
+
+```bash
+cd coco128
+clearml-data sync --project YOLOv5 --name coco128 --folder .
+```
+
+The command `clearml-data sync` is actually a shorthand command. You could also run these commands one after the other:
+
+```bash
+# Optionally add --parent <parent_dataset_id> if you want to base
+# this version on another dataset version, so no duplicate files are uploaded!
+clearml-data create --name coco128 --project YOLOv5
+clearml-data add --files .
+clearml-data close
+```
+
+### Run Training Using A ClearML Dataset
+
+Now that you have a ClearML dataset, you can very simply use it to train custom YOLOv5 🚀 models!
+
+```bash
+python train.py --img 640 --batch 16 --epochs 3 --data clearml://<your_dataset_id> --weights yolov5s.pt --cache
+```
+
+<br />
+
+## 👀 Hyperparameter Optimization
+
+Now that we have our experiments and data versioned, it's time to take a look at what we can build on top!
+
+Using the code information, installed packages and environment details, the experiment itself is now **completely reproducible**. In fact, ClearML allows you to clone an experiment and even change its parameters. We can then just rerun it with these new parameters automatically, this is basically what HPO does!
+
+To **run hyperparameter optimization locally**, we've included a pre-made script for you. Just make sure a training task has been run at least once, so it is in the ClearML experiment manager, we will essentially clone it and change its hyperparameters.
+
+You'll need to fill in the ID of this `template task` in the script found at `utils/loggers/clearml/hpo.py` and then just run it :) You can change `task.execute_locally()` to `task.execute()` to put it in a ClearML queue and have a remote agent work on it instead.
+
+```bash
+# To use optuna, install it first, otherwise you can change the optimizer to just be RandomSearch
+pip install optuna
+python utils/loggers/clearml/hpo.py
+```
+
+![HPO](https://github.com/thepycoder/clearml_screenshots/raw/main/hpo.png)
+
+## 🤯 Remote Execution (advanced)
+
+Running HPO locally is really handy, but what if we want to run our experiments on a remote machine instead? Maybe you have access to a very powerful GPU machine on-site, or you have some budget to use cloud GPUs.
+This is where the ClearML Agent comes into play. Check out what the agent can do here:
+
+- [YouTube video](https://youtu.be/MX3BrXnaULs)
+- [Documentation](https://clear.ml/docs/latest/docs/clearml_agent)
+
+In short: every experiment tracked by the experiment manager contains enough information to reproduce it on a different machine (installed packages, uncommitted changes etc.). So a ClearML agent does just that: it listens to a queue for incoming tasks and when it finds one, it recreates the environment and runs it while still reporting scalars, plots etc. to the experiment manager.
+
+You can turn any machine (a cloud VM, a local GPU machine, your own laptop ... ) into a ClearML agent by simply running:
+
+```bash
+clearml-agent daemon --queue <queues_to_listen_to> [--docker]
+```
+
+### Cloning, Editing And Enqueuing
+
+With our agent running, we can give it some work. Remember from the HPO section that we can clone a task and edit the hyperparameters? We can do that from the interface too!
+
+🪄 Clone the experiment by right-clicking it
+
+🎯 Edit the hyperparameters to what you wish them to be
+
+⏳ Enqueue the task to any of the queues by right-clicking it
+
+![Enqueue a task from the UI](https://github.com/thepycoder/clearml_screenshots/raw/main/enqueue.gif)
+
+### Executing A Task Remotely
+
+Now you can clone a task like we explained above, or simply mark your current script by adding `task.execute_remotely()` and on execution it will be put into a queue, for the agent to start working on!
+
+To run the YOLOv5 training script remotely, all you have to do is add this line to the training.py script after the clearml logger has been instantiated:
+
+```python
+# ...
+# Loggers
+data_dict = None
+if RANK in {-1, 0}:
+    loggers = Loggers(save_dir, weights, opt, hyp, LOGGER)  # loggers instance
+    if loggers.clearml:
+        loggers.clearml.task.execute_remotely(queue="my_queue")  # <------ ADD THIS LINE
+        # Data_dict is either None is user did not choose for ClearML dataset or is filled in by ClearML
+        data_dict = loggers.clearml.data_dict
+# ...
+```
+
+When running the training script after this change, python will run the script up until that line, after which it will package the code and send it to the queue instead!
+
+### Autoscaling workers
+
+ClearML comes with autoscalers too! This tool will automatically spin up new remote machines in the cloud of your choice (AWS, GCP, Azure) and turn them into ClearML agents for you whenever there are experiments detected in the queue. Once the tasks are processed, the autoscaler will automatically shut down the remote machines, and you stop paying!
+
+Check out the autoscalers getting started video below.
+
+[![Watch the video](https://img.youtube.com/vi/j4XVMAaUt3E/0.jpg)](https://youtu.be/j4XVMAaUt3E)
diff --git a/utils/wandb_logging/__init__.py b/utils/loggers/clearml/__init__.py
similarity index 100%
rename from utils/wandb_logging/__init__.py
rename to utils/loggers/clearml/__init__.py
diff --git a/utils/loggers/clearml/clearml_utils.py b/utils/loggers/clearml/clearml_utils.py
new file mode 100644
index 000000000000..2764abe90da8
--- /dev/null
+++ b/utils/loggers/clearml/clearml_utils.py
@@ -0,0 +1,164 @@
+"""Main Logger class for ClearML experiment tracking."""
+import glob
+import re
+from pathlib import Path
+
+import numpy as np
+import yaml
+
+from utils.plots import Annotator, colors
+
+try:
+    import clearml
+    from clearml import Dataset, Task
+
+    assert hasattr(clearml, '__version__')  # verify package import not local dir
+except (ImportError, AssertionError):
+    clearml = None
+
+
+def construct_dataset(clearml_info_string):
+    """Load in a clearml dataset and fill the internal data_dict with its contents.
+    """
+    dataset_id = clearml_info_string.replace('clearml://', '')
+    dataset = Dataset.get(dataset_id=dataset_id)
+    dataset_root_path = Path(dataset.get_local_copy())
+
+    # We'll search for the yaml file definition in the dataset
+    yaml_filenames = list(glob.glob(str(dataset_root_path / '*.yaml')) + glob.glob(str(dataset_root_path / '*.yml')))
+    if len(yaml_filenames) > 1:
+        raise ValueError('More than one yaml file was found in the dataset root, cannot determine which one contains '
+                         'the dataset definition this way.')
+    elif len(yaml_filenames) == 0:
+        raise ValueError('No yaml definition found in dataset root path, check that there is a correct yaml file '
+                         'inside the dataset root path.')
+    with open(yaml_filenames[0]) as f:
+        dataset_definition = yaml.safe_load(f)
+
+    assert set(dataset_definition.keys()).issuperset(
+        {'train', 'test', 'val', 'nc', 'names'}
+    ), "The right keys were not found in the yaml file, make sure it at least has the following keys: ('train', 'test', 'val', 'nc', 'names')"
+
+    data_dict = dict()
+    data_dict['train'] = str(
+        (dataset_root_path / dataset_definition['train']).resolve()) if dataset_definition['train'] else None
+    data_dict['test'] = str(
+        (dataset_root_path / dataset_definition['test']).resolve()) if dataset_definition['test'] else None
+    data_dict['val'] = str(
+        (dataset_root_path / dataset_definition['val']).resolve()) if dataset_definition['val'] else None
+    data_dict['nc'] = dataset_definition['nc']
+    data_dict['names'] = dataset_definition['names']
+
+    return data_dict
+
+
+class ClearmlLogger:
+    """Log training runs, datasets, models, and predictions to ClearML.
+
+    This logger sends information to ClearML at app.clear.ml or to your own hosted server. By default,
+    this information includes hyperparameters, system configuration and metrics, model metrics, code information and
+    basic data metrics and analyses.
+
+    By providing additional command line arguments to train.py, datasets,
+    models and predictions can also be logged.
+    """
+
+    def __init__(self, opt, hyp):
+        """
+        - Initialize ClearML Task, this object will capture the experiment
+        - Upload dataset version to ClearML Data if opt.upload_dataset is True
+
+        arguments:
+        opt (namespace) -- Commandline arguments for this run
+        hyp (dict) -- Hyperparameters for this run
+
+        """
+        self.current_epoch = 0
+        # Keep tracked of amount of logged images to enforce a limit
+        self.current_epoch_logged_images = set()
+        # Maximum number of images to log to clearML per epoch
+        self.max_imgs_to_log_per_epoch = 16
+        # Get the interval of epochs when bounding box images should be logged
+        self.bbox_interval = opt.bbox_interval
+        self.clearml = clearml
+        self.task = None
+        self.data_dict = None
+        if self.clearml:
+            self.task = Task.init(
+                project_name=opt.project if opt.project != 'runs/train' else 'YOLOv5',
+                task_name=opt.name if opt.name != 'exp' else 'Training',
+                tags=['YOLOv5'],
+                output_uri=True,
+                reuse_last_task_id=opt.exist_ok,
+                auto_connect_frameworks={'pytorch': False}
+                # We disconnect pytorch auto-detection, because we added manual model save points in the code
+            )
+            # ClearML's hooks will already grab all general parameters
+            # Only the hyperparameters coming from the yaml config file
+            # will have to be added manually!
+            self.task.connect(hyp, name='Hyperparameters')
+            self.task.connect(opt, name='Args')
+
+            # Make sure the code is easily remotely runnable by setting the docker image to use by the remote agent
+            self.task.set_base_docker('ultralytics/yolov5:latest',
+                                      docker_arguments='--ipc=host -e="CLEARML_AGENT_SKIP_PYTHON_ENV_INSTALL=1"',
+                                      docker_setup_bash_script='pip install clearml')
+
+            # Get ClearML Dataset Version if requested
+            if opt.data.startswith('clearml://'):
+                # data_dict should have the following keys:
+                # names, nc (number of classes), test, train, val (all three relative paths to ../datasets)
+                self.data_dict = construct_dataset(opt.data)
+                # Set data to data_dict because wandb will crash without this information and opt is the best way
+                # to give it to them
+                opt.data = self.data_dict
+
+    def log_debug_samples(self, files, title='Debug Samples'):
+        """
+        Log files (images) as debug samples in the ClearML task.
+
+        arguments:
+        files (List(PosixPath)) a list of file paths in PosixPath format
+        title (str) A title that groups together images with the same values
+        """
+        for f in files:
+            if f.exists():
+                it = re.search(r'_batch(\d+)', f.name)
+                iteration = int(it.groups()[0]) if it else 0
+                self.task.get_logger().report_image(title=title,
+                                                    series=f.name.replace(it.group(), ''),
+                                                    local_path=str(f),
+                                                    iteration=iteration)
+
+    def log_image_with_boxes(self, image_path, boxes, class_names, image, conf_threshold=0.25):
+        """
+        Draw the bounding boxes on a single image and report the result as a ClearML debug sample.
+
+        arguments:
+        image_path (PosixPath) the path the original image file
+        boxes (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class]
+        class_names (dict): dict containing mapping of class int to class name
+        image (Tensor): A torch tensor containing the actual image data
+        """
+        if len(self.current_epoch_logged_images) < self.max_imgs_to_log_per_epoch and self.current_epoch >= 0:
+            # Log every bbox_interval times and deduplicate for any intermittend extra eval runs
+            if self.current_epoch % self.bbox_interval == 0 and image_path not in self.current_epoch_logged_images:
+                im = np.ascontiguousarray(np.moveaxis(image.mul(255).clamp(0, 255).byte().cpu().numpy(), 0, 2))
+                annotator = Annotator(im=im, pil=True)
+                for i, (conf, class_nr, box) in enumerate(zip(boxes[:, 4], boxes[:, 5], boxes[:, :4])):
+                    color = colors(i)
+
+                    class_name = class_names[int(class_nr)]
+                    confidence_percentage = round(float(conf) * 100, 2)
+                    label = f'{class_name}: {confidence_percentage}%'
+
+                    if conf > conf_threshold:
+                        annotator.rectangle(box.cpu().numpy(), outline=color)
+                        annotator.box_label(box.cpu().numpy(), label=label, color=color)
+
+                annotated_image = annotator.result()
+                self.task.get_logger().report_image(title='Bounding Boxes',
+                                                    series=image_path.name,
+                                                    iteration=self.current_epoch,
+                                                    image=annotated_image)
+                self.current_epoch_logged_images.add(image_path)
diff --git a/utils/loggers/clearml/hpo.py b/utils/loggers/clearml/hpo.py
new file mode 100644
index 000000000000..ee518b0fbfc8
--- /dev/null
+++ b/utils/loggers/clearml/hpo.py
@@ -0,0 +1,84 @@
+from clearml import Task
+# Connecting ClearML with the current process,
+# from here on everything is logged automatically
+from clearml.automation import HyperParameterOptimizer, UniformParameterRange
+from clearml.automation.optuna import OptimizerOptuna
+
+task = Task.init(project_name='Hyper-Parameter Optimization',
+                 task_name='YOLOv5',
+                 task_type=Task.TaskTypes.optimizer,
+                 reuse_last_task_id=False)
+
+# Example use case:
+optimizer = HyperParameterOptimizer(
+    # This is the experiment we want to optimize
+    base_task_id='<your_template_task_id>',
+    # here we define the hyper-parameters to optimize
+    # Notice: The parameter name should exactly match what you see in the UI: <section_name>/<parameter>
+    # For Example, here we see in the base experiment a section Named: "General"
+    # under it a parameter named "batch_size", this becomes "General/batch_size"
+    # If you have `argparse` for example, then arguments will appear under the "Args" section,
+    # and you should instead pass "Args/batch_size"
+    hyper_parameters=[
+        UniformParameterRange('Hyperparameters/lr0', min_value=1e-5, max_value=1e-1),
+        UniformParameterRange('Hyperparameters/lrf', min_value=0.01, max_value=1.0),
+        UniformParameterRange('Hyperparameters/momentum', min_value=0.6, max_value=0.98),
+        UniformParameterRange('Hyperparameters/weight_decay', min_value=0.0, max_value=0.001),
+        UniformParameterRange('Hyperparameters/warmup_epochs', min_value=0.0, max_value=5.0),
+        UniformParameterRange('Hyperparameters/warmup_momentum', min_value=0.0, max_value=0.95),
+        UniformParameterRange('Hyperparameters/warmup_bias_lr', min_value=0.0, max_value=0.2),
+        UniformParameterRange('Hyperparameters/box', min_value=0.02, max_value=0.2),
+        UniformParameterRange('Hyperparameters/cls', min_value=0.2, max_value=4.0),
+        UniformParameterRange('Hyperparameters/cls_pw', min_value=0.5, max_value=2.0),
+        UniformParameterRange('Hyperparameters/obj', min_value=0.2, max_value=4.0),
+        UniformParameterRange('Hyperparameters/obj_pw', min_value=0.5, max_value=2.0),
+        UniformParameterRange('Hyperparameters/iou_t', min_value=0.1, max_value=0.7),
+        UniformParameterRange('Hyperparameters/anchor_t', min_value=2.0, max_value=8.0),
+        UniformParameterRange('Hyperparameters/fl_gamma', min_value=0.0, max_value=4.0),
+        UniformParameterRange('Hyperparameters/hsv_h', min_value=0.0, max_value=0.1),
+        UniformParameterRange('Hyperparameters/hsv_s', min_value=0.0, max_value=0.9),
+        UniformParameterRange('Hyperparameters/hsv_v', min_value=0.0, max_value=0.9),
+        UniformParameterRange('Hyperparameters/degrees', min_value=0.0, max_value=45.0),
+        UniformParameterRange('Hyperparameters/translate', min_value=0.0, max_value=0.9),
+        UniformParameterRange('Hyperparameters/scale', min_value=0.0, max_value=0.9),
+        UniformParameterRange('Hyperparameters/shear', min_value=0.0, max_value=10.0),
+        UniformParameterRange('Hyperparameters/perspective', min_value=0.0, max_value=0.001),
+        UniformParameterRange('Hyperparameters/flipud', min_value=0.0, max_value=1.0),
+        UniformParameterRange('Hyperparameters/fliplr', min_value=0.0, max_value=1.0),
+        UniformParameterRange('Hyperparameters/mosaic', min_value=0.0, max_value=1.0),
+        UniformParameterRange('Hyperparameters/mixup', min_value=0.0, max_value=1.0),
+        UniformParameterRange('Hyperparameters/copy_paste', min_value=0.0, max_value=1.0)],
+    # this is the objective metric we want to maximize/minimize
+    objective_metric_title='metrics',
+    objective_metric_series='mAP_0.5',
+    # now we decide if we want to maximize it or minimize it (accuracy we maximize)
+    objective_metric_sign='max',
+    # let us limit the number of concurrent experiments,
+    # this in turn will make sure we do dont bombard the scheduler with experiments.
+    # if we have an auto-scaler connected, this, by proxy, will limit the number of machine
+    max_number_of_concurrent_tasks=1,
+    # this is the optimizer class (actually doing the optimization)
+    # Currently, we can choose from GridSearch, RandomSearch or OptimizerBOHB (Bayesian optimization Hyper-Band)
+    optimizer_class=OptimizerOptuna,
+    # If specified only the top K performing Tasks will be kept, the others will be automatically archived
+    save_top_k_tasks_only=5,  # 5,
+    compute_time_limit=None,
+    total_max_jobs=20,
+    min_iteration_per_job=None,
+    max_iteration_per_job=None,
+)
+
+# report every 10 seconds, this is way too often, but we are testing here
+optimizer.set_report_period(10 / 60)
+# You can also use the line below instead to run all the optimizer tasks locally, without using queues or agent
+# an_optimizer.start_locally(job_complete_callback=job_complete_callback)
+# set the time limit for the optimization process (2 hours)
+optimizer.set_time_limit(in_minutes=120.0)
+# Start the optimization process in the local environment
+optimizer.start_locally()
+# wait until process is done (notice we are controlling the optimization process in the background)
+optimizer.wait()
+# make sure background optimization stopped
+optimizer.stop()
+
+print('We are done, good bye')
diff --git a/utils/loggers/comet/README.md b/utils/loggers/comet/README.md
new file mode 100644
index 000000000000..47e6a45654b8
--- /dev/null
+++ b/utils/loggers/comet/README.md
@@ -0,0 +1,258 @@
+<img src="https://cdn.comet.ml/img/notebook_logo.png">
+
+# YOLOv5 with Comet
+
+This guide will cover how to use YOLOv5 with [Comet](https://bit.ly/yolov5-readme-comet2)
+
+# About Comet
+
+Comet builds tools that help data scientists, engineers, and team leaders accelerate and optimize machine learning and deep learning models.
+
+Track and visualize model metrics in real time, save your hyperparameters, datasets, and model checkpoints, and visualize your model predictions with [Comet Custom Panels](https://www.comet.com/docs/v2/guides/comet-dashboard/code-panels/about-panels/?utm_source=yolov5&utm_medium=partner&utm_campaign=partner_yolov5_2022&utm_content=github)!
+Comet makes sure you never lose track of your work and makes it easy to share results and collaborate across teams of all sizes!
+
+# Getting Started
+
+## Install Comet
+
+```shell
+pip install comet_ml
+```
+
+## Configure Comet Credentials
+
+There are two ways to configure Comet with YOLOv5.
+
+You can either set your credentials through environment variables
+
+**Environment Variables**
+
+```shell
+export COMET_API_KEY=<Your Comet API Key>
+export COMET_PROJECT_NAME=<Your Comet Project Name> # This will default to 'yolov5'
+```
+
+Or create a `.comet.config` file in your working directory and set your credentials there.
+
+**Comet Configuration File**
+
+```
+[comet]
+api_key=<Your Comet API Key>
+project_name=<Your Comet Project Name> # This will default to 'yolov5'
+```
+
+## Run the Training Script
+
+```shell
+# Train YOLOv5s on COCO128 for 5 epochs
+python train.py --img 640 --batch 16 --epochs 5 --data coco128.yaml --weights yolov5s.pt
+```
+
+That's it! Comet will automatically log your hyperparameters, command line arguments, training and validation metrics. You can visualize and analyze your runs in the Comet UI
+
+<img width="1920" alt="yolo-ui" src="https://user-images.githubusercontent.com/26833433/202851203-164e94e1-2238-46dd-91f8-de020e9d6b41.png">
+
+# Try out an Example!
+
+Check out an example of a [completed run here](https://www.comet.com/examples/comet-example-yolov5/a0e29e0e9b984e4a822db2a62d0cb357?experiment-tab=chart&showOutliers=true&smoothing=0&transformY=smoothing&xAxis=step&utm_source=yolov5&utm_medium=partner&utm_campaign=partner_yolov5_2022&utm_content=github)
+
+Or better yet, try it out yourself in this Colab Notebook
+
+[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1RG0WOQyxlDlo5Km8GogJpIEJlg_5lyYO?usp=sharing)
+
+# Log automatically
+
+By default, Comet will log the following items
+
+## Metrics
+
+- Box Loss, Object Loss, Classification Loss for the training and validation data
+- mAP_0.5, mAP_0.5:0.95 metrics for the validation data.
+- Precision and Recall for the validation data
+
+## Parameters
+
+- Model Hyperparameters
+- All parameters passed through the command line options
+
+## Visualizations
+
+- Confusion Matrix of the model predictions on the validation data
+- Plots for the PR and F1 curves across all classes
+- Correlogram of the Class Labels
+
+# Configure Comet Logging
+
+Comet can be configured to log additional data either through command line flags passed to the training script
+or through environment variables.
+
+```shell
+export COMET_MODE=online # Set whether to run Comet in 'online' or 'offline' mode. Defaults to online
+export COMET_MODEL_NAME=<your model name> #Set the name for the saved model. Defaults to yolov5
+export COMET_LOG_CONFUSION_MATRIX=false # Set to disable logging a Comet Confusion Matrix. Defaults to true
+export COMET_MAX_IMAGE_UPLOADS=<number of allowed images to upload to Comet> # Controls how many total image predictions to log to Comet. Defaults to 100.
+export COMET_LOG_PER_CLASS_METRICS=true # Set to log evaluation metrics for each detected class at the end of training. Defaults to false
+export COMET_DEFAULT_CHECKPOINT_FILENAME=<your checkpoint filename> # Set this if you would like to resume training from a different checkpoint. Defaults to 'last.pt'
+export COMET_LOG_BATCH_LEVEL_METRICS=true # Set this if you would like to log training metrics at the batch level. Defaults to false.
+export COMET_LOG_PREDICTIONS=true # Set this to false to disable logging model predictions
+```
+
+## Logging Checkpoints with Comet
+
+Logging Models to Comet is disabled by default. To enable it, pass the `save-period` argument to the training script. This will save the
+logged checkpoints to Comet based on the interval value provided by `save-period`
+
+```shell
+python train.py \
+--img 640 \
+--batch 16 \
+--epochs 5 \
+--data coco128.yaml \
+--weights yolov5s.pt \
+--save-period 1
+```
+
+## Logging Model Predictions
+
+By default, model predictions (images, ground truth labels and bounding boxes) will be logged to Comet.
+
+You can control the frequency of logged predictions and the associated images by passing the `bbox_interval` command line argument. Predictions can be visualized using Comet's Object Detection Custom Panel. This frequency corresponds to every Nth batch of data per epoch. In the example below, we are logging every 2nd batch of data for each epoch.
+
+**Note:** The YOLOv5 validation dataloader will default to a batch size of 32, so you will have to set the logging frequency accordingly.
+
+Here is an [example project using the Panel](https://www.comet.com/examples/comet-example-yolov5?shareable=YcwMiJaZSXfcEXpGOHDD12vA1&utm_source=yolov5&utm_medium=partner&utm_campaign=partner_yolov5_2022&utm_content=github)
+
+```shell
+python train.py \
+--img 640 \
+--batch 16 \
+--epochs 5 \
+--data coco128.yaml \
+--weights yolov5s.pt \
+--bbox_interval 2
+```
+
+### Controlling the number of Prediction Images logged to Comet
+
+When logging predictions from YOLOv5, Comet will log the images associated with each set of predictions. By default a maximum of 100 validation images are logged. You can increase or decrease this number using the `COMET_MAX_IMAGE_UPLOADS` environment variable.
+
+```shell
+env COMET_MAX_IMAGE_UPLOADS=200 python train.py \
+--img 640 \
+--batch 16 \
+--epochs 5 \
+--data coco128.yaml \
+--weights yolov5s.pt \
+--bbox_interval 1
+```
+
+### Logging Class Level Metrics
+
+Use the `COMET_LOG_PER_CLASS_METRICS` environment variable to log mAP, precision, recall, f1 for each class.
+
+```shell
+env COMET_LOG_PER_CLASS_METRICS=true python train.py \
+--img 640 \
+--batch 16 \
+--epochs 5 \
+--data coco128.yaml \
+--weights yolov5s.pt
+```
+
+## Uploading a Dataset to Comet Artifacts
+
+If you would like to store your data using [Comet Artifacts](https://www.comet.com/docs/v2/guides/data-management/using-artifacts/#learn-more?utm_source=yolov5&utm_medium=partner&utm_campaign=partner_yolov5_2022&utm_content=github), you can do so using the `upload_dataset` flag.
+
+The dataset be organized in the way described in the [YOLOv5 documentation](https://docs.ultralytics.com/tutorials/train-custom-datasets/#3-organize-directories). The dataset config `yaml` file must follow the same format as that of the `coco128.yaml` file.
+
+```shell
+python train.py \
+--img 640 \
+--batch 16 \
+--epochs 5 \
+--data coco128.yaml \
+--weights yolov5s.pt \
+--upload_dataset
+```
+
+You can find the uploaded dataset in the Artifacts tab in your Comet Workspace
+<img width="1073" alt="artifact-1" src="https://user-images.githubusercontent.com/7529846/186929193-162718bf-ec7b-4eb9-8c3b-86b3763ef8ea.png">
+
+You can preview the data directly in the Comet UI.
+<img width="1082" alt="artifact-2" src="https://user-images.githubusercontent.com/7529846/186929215-432c36a9-c109-4eb0-944b-84c2786590d6.png">
+
+Artifacts are versioned and also support adding metadata about the dataset. Comet will automatically log the metadata from your dataset `yaml` file
+<img width="963" alt="artifact-3" src="https://user-images.githubusercontent.com/7529846/186929256-9d44d6eb-1a19-42de-889a-bcbca3018f2e.png">
+
+### Using a saved Artifact
+
+If you would like to use a dataset from Comet Artifacts, set the `path` variable in your dataset `yaml` file to point to the following Artifact resource URL.
+
+```
+# contents of artifact.yaml file
+path: "comet://<workspace name>/<artifact name>:<artifact version or alias>"
+```
+
+Then pass this file to your training script in the following way
+
+```shell
+python train.py \
+--img 640 \
+--batch 16 \
+--epochs 5 \
+--data artifact.yaml \
+--weights yolov5s.pt
+```
+
+Artifacts also allow you to track the lineage of data as it flows through your Experimentation workflow. Here you can see a graph that shows you all the experiments that have used your uploaded dataset.
+<img width="1391" alt="artifact-4" src="https://user-images.githubusercontent.com/7529846/186929264-4c4014fa-fe51-4f3c-a5c5-f6d24649b1b4.png">
+
+## Resuming a Training Run
+
+If your training run is interrupted for any reason, e.g. disrupted internet connection, you can resume the run using the `resume` flag and the Comet Run Path.
+
+The Run Path has the following format `comet://<your workspace name>/<your project name>/<experiment id>`.
+
+This will restore the run to its state before the interruption, which includes restoring the  model from a checkpoint, restoring all hyperparameters and training arguments and downloading Comet dataset Artifacts if they were used in the original run. The resumed run will continue logging to the existing Experiment in the Comet UI
+
+```shell
+python train.py \
+--resume "comet://<your run path>"
+```
+
+## Hyperparameter Search with the Comet Optimizer
+
+YOLOv5 is also integrated with Comet's Optimizer, making is simple to visualize hyperparameter sweeps in the Comet UI.
+
+### Configuring an Optimizer Sweep
+
+To configure the Comet Optimizer, you will have to create a JSON file with the information about the sweep. An example file has been provided in `utils/loggers/comet/optimizer_config.json`
+
+```shell
+python utils/loggers/comet/hpo.py \
+  --comet_optimizer_config "utils/loggers/comet/optimizer_config.json"
+```
+
+The `hpo.py` script accepts the same arguments as `train.py`. If you wish to pass additional arguments to your sweep simply add them after
+the script.
+
+```shell
+python utils/loggers/comet/hpo.py \
+  --comet_optimizer_config "utils/loggers/comet/optimizer_config.json" \
+  --save-period 1 \
+  --bbox_interval 1
+```
+
+### Running a Sweep in Parallel
+
+```shell
+comet optimizer -j <set number of workers> utils/loggers/comet/hpo.py \
+  utils/loggers/comet/optimizer_config.json"
+```
+
+### Visualizing Results
+
+Comet provides a number of ways to visualize the results of your sweep. Take a look at a [project with a completed sweep here](https://www.comet.com/examples/comet-example-yolov5/view/PrlArHGuuhDTKC1UuBmTtOSXD/panels?utm_source=yolov5&utm_medium=partner&utm_campaign=partner_yolov5_2022&utm_content=github)
+
+<img width="1626" alt="hyperparameter-yolo" src="https://user-images.githubusercontent.com/7529846/186914869-7dc1de14-583f-4323-967b-c9a66a29e495.png">
diff --git a/utils/loggers/comet/__init__.py b/utils/loggers/comet/__init__.py
new file mode 100644
index 000000000000..d4599841c9fc
--- /dev/null
+++ b/utils/loggers/comet/__init__.py
@@ -0,0 +1,508 @@
+import glob
+import json
+import logging
+import os
+import sys
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[3]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+
+try:
+    import comet_ml
+
+    # Project Configuration
+    config = comet_ml.config.get_config()
+    COMET_PROJECT_NAME = config.get_string(os.getenv('COMET_PROJECT_NAME'), 'comet.project_name', default='yolov5')
+except (ModuleNotFoundError, ImportError):
+    comet_ml = None
+    COMET_PROJECT_NAME = None
+
+import PIL
+import torch
+import torchvision.transforms as T
+import yaml
+
+from utils.dataloaders import img2label_paths
+from utils.general import check_dataset, scale_boxes, xywh2xyxy
+from utils.metrics import box_iou
+
+COMET_PREFIX = 'comet://'
+
+COMET_MODE = os.getenv('COMET_MODE', 'online')
+
+# Model Saving Settings
+COMET_MODEL_NAME = os.getenv('COMET_MODEL_NAME', 'yolov5')
+
+# Dataset Artifact Settings
+COMET_UPLOAD_DATASET = os.getenv('COMET_UPLOAD_DATASET', 'false').lower() == 'true'
+
+# Evaluation Settings
+COMET_LOG_CONFUSION_MATRIX = os.getenv('COMET_LOG_CONFUSION_MATRIX', 'true').lower() == 'true'
+COMET_LOG_PREDICTIONS = os.getenv('COMET_LOG_PREDICTIONS', 'true').lower() == 'true'
+COMET_MAX_IMAGE_UPLOADS = int(os.getenv('COMET_MAX_IMAGE_UPLOADS', 100))
+
+# Confusion Matrix Settings
+CONF_THRES = float(os.getenv('CONF_THRES', 0.001))
+IOU_THRES = float(os.getenv('IOU_THRES', 0.6))
+
+# Batch Logging Settings
+COMET_LOG_BATCH_METRICS = os.getenv('COMET_LOG_BATCH_METRICS', 'false').lower() == 'true'
+COMET_BATCH_LOGGING_INTERVAL = os.getenv('COMET_BATCH_LOGGING_INTERVAL', 1)
+COMET_PREDICTION_LOGGING_INTERVAL = os.getenv('COMET_PREDICTION_LOGGING_INTERVAL', 1)
+COMET_LOG_PER_CLASS_METRICS = os.getenv('COMET_LOG_PER_CLASS_METRICS', 'false').lower() == 'true'
+
+RANK = int(os.getenv('RANK', -1))
+
+to_pil = T.ToPILImage()
+
+
+class CometLogger:
+    """Log metrics, parameters, source code, models and much more
+    with Comet
+    """
+
+    def __init__(self, opt, hyp, run_id=None, job_type='Training', **experiment_kwargs) -> None:
+        self.job_type = job_type
+        self.opt = opt
+        self.hyp = hyp
+
+        # Comet Flags
+        self.comet_mode = COMET_MODE
+
+        self.save_model = opt.save_period > -1
+        self.model_name = COMET_MODEL_NAME
+
+        # Batch Logging Settings
+        self.log_batch_metrics = COMET_LOG_BATCH_METRICS
+        self.comet_log_batch_interval = COMET_BATCH_LOGGING_INTERVAL
+
+        # Dataset Artifact Settings
+        self.upload_dataset = self.opt.upload_dataset if self.opt.upload_dataset else COMET_UPLOAD_DATASET
+        self.resume = self.opt.resume
+
+        # Default parameters to pass to Experiment objects
+        self.default_experiment_kwargs = {
+            'log_code': False,
+            'log_env_gpu': True,
+            'log_env_cpu': True,
+            'project_name': COMET_PROJECT_NAME,}
+        self.default_experiment_kwargs.update(experiment_kwargs)
+        self.experiment = self._get_experiment(self.comet_mode, run_id)
+
+        self.data_dict = self.check_dataset(self.opt.data)
+        self.class_names = self.data_dict['names']
+        self.num_classes = self.data_dict['nc']
+
+        self.logged_images_count = 0
+        self.max_images = COMET_MAX_IMAGE_UPLOADS
+
+        if run_id is None:
+            self.experiment.log_other('Created from', 'YOLOv5')
+            if not isinstance(self.experiment, comet_ml.OfflineExperiment):
+                workspace, project_name, experiment_id = self.experiment.url.split('/')[-3:]
+                self.experiment.log_other(
+                    'Run Path',
+                    f'{workspace}/{project_name}/{experiment_id}',
+                )
+            self.log_parameters(vars(opt))
+            self.log_parameters(self.opt.hyp)
+            self.log_asset_data(
+                self.opt.hyp,
+                name='hyperparameters.json',
+                metadata={'type': 'hyp-config-file'},
+            )
+            self.log_asset(
+                f'{self.opt.save_dir}/opt.yaml',
+                metadata={'type': 'opt-config-file'},
+            )
+
+        self.comet_log_confusion_matrix = COMET_LOG_CONFUSION_MATRIX
+
+        if hasattr(self.opt, 'conf_thres'):
+            self.conf_thres = self.opt.conf_thres
+        else:
+            self.conf_thres = CONF_THRES
+        if hasattr(self.opt, 'iou_thres'):
+            self.iou_thres = self.opt.iou_thres
+        else:
+            self.iou_thres = IOU_THRES
+
+        self.log_parameters({'val_iou_threshold': self.iou_thres, 'val_conf_threshold': self.conf_thres})
+
+        self.comet_log_predictions = COMET_LOG_PREDICTIONS
+        if self.opt.bbox_interval == -1:
+            self.comet_log_prediction_interval = 1 if self.opt.epochs < 10 else self.opt.epochs // 10
+        else:
+            self.comet_log_prediction_interval = self.opt.bbox_interval
+
+        if self.comet_log_predictions:
+            self.metadata_dict = {}
+            self.logged_image_names = []
+
+        self.comet_log_per_class_metrics = COMET_LOG_PER_CLASS_METRICS
+
+        self.experiment.log_others({
+            'comet_mode': COMET_MODE,
+            'comet_max_image_uploads': COMET_MAX_IMAGE_UPLOADS,
+            'comet_log_per_class_metrics': COMET_LOG_PER_CLASS_METRICS,
+            'comet_log_batch_metrics': COMET_LOG_BATCH_METRICS,
+            'comet_log_confusion_matrix': COMET_LOG_CONFUSION_MATRIX,
+            'comet_model_name': COMET_MODEL_NAME,})
+
+        # Check if running the Experiment with the Comet Optimizer
+        if hasattr(self.opt, 'comet_optimizer_id'):
+            self.experiment.log_other('optimizer_id', self.opt.comet_optimizer_id)
+            self.experiment.log_other('optimizer_objective', self.opt.comet_optimizer_objective)
+            self.experiment.log_other('optimizer_metric', self.opt.comet_optimizer_metric)
+            self.experiment.log_other('optimizer_parameters', json.dumps(self.hyp))
+
+    def _get_experiment(self, mode, experiment_id=None):
+        if mode == 'offline':
+            if experiment_id is not None:
+                return comet_ml.ExistingOfflineExperiment(
+                    previous_experiment=experiment_id,
+                    **self.default_experiment_kwargs,
+                )
+
+            return comet_ml.OfflineExperiment(**self.default_experiment_kwargs,)
+
+        else:
+            try:
+                if experiment_id is not None:
+                    return comet_ml.ExistingExperiment(
+                        previous_experiment=experiment_id,
+                        **self.default_experiment_kwargs,
+                    )
+
+                return comet_ml.Experiment(**self.default_experiment_kwargs)
+
+            except ValueError:
+                logger.warning('COMET WARNING: '
+                               'Comet credentials have not been set. '
+                               'Comet will default to offline logging. '
+                               'Please set your credentials to enable online logging.')
+                return self._get_experiment('offline', experiment_id)
+
+        return
+
+    def log_metrics(self, log_dict, **kwargs):
+        self.experiment.log_metrics(log_dict, **kwargs)
+
+    def log_parameters(self, log_dict, **kwargs):
+        self.experiment.log_parameters(log_dict, **kwargs)
+
+    def log_asset(self, asset_path, **kwargs):
+        self.experiment.log_asset(asset_path, **kwargs)
+
+    def log_asset_data(self, asset, **kwargs):
+        self.experiment.log_asset_data(asset, **kwargs)
+
+    def log_image(self, img, **kwargs):
+        self.experiment.log_image(img, **kwargs)
+
+    def log_model(self, path, opt, epoch, fitness_score, best_model=False):
+        if not self.save_model:
+            return
+
+        model_metadata = {
+            'fitness_score': fitness_score[-1],
+            'epochs_trained': epoch + 1,
+            'save_period': opt.save_period,
+            'total_epochs': opt.epochs,}
+
+        model_files = glob.glob(f'{path}/*.pt')
+        for model_path in model_files:
+            name = Path(model_path).name
+
+            self.experiment.log_model(
+                self.model_name,
+                file_or_folder=model_path,
+                file_name=name,
+                metadata=model_metadata,
+                overwrite=True,
+            )
+
+    def check_dataset(self, data_file):
+        with open(data_file) as f:
+            data_config = yaml.safe_load(f)
+
+        if data_config['path'].startswith(COMET_PREFIX):
+            path = data_config['path'].replace(COMET_PREFIX, '')
+            data_dict = self.download_dataset_artifact(path)
+
+            return data_dict
+
+        self.log_asset(self.opt.data, metadata={'type': 'data-config-file'})
+
+        return check_dataset(data_file)
+
+    def log_predictions(self, image, labelsn, path, shape, predn):
+        if self.logged_images_count >= self.max_images:
+            return
+        detections = predn[predn[:, 4] > self.conf_thres]
+        iou = box_iou(labelsn[:, 1:], detections[:, :4])
+        mask, _ = torch.where(iou > self.iou_thres)
+        if len(mask) == 0:
+            return
+
+        filtered_detections = detections[mask]
+        filtered_labels = labelsn[mask]
+
+        image_id = path.split('/')[-1].split('.')[0]
+        image_name = f'{image_id}_curr_epoch_{self.experiment.curr_epoch}'
+        if image_name not in self.logged_image_names:
+            native_scale_image = PIL.Image.open(path)
+            self.log_image(native_scale_image, name=image_name)
+            self.logged_image_names.append(image_name)
+
+        metadata = []
+        for cls, *xyxy in filtered_labels.tolist():
+            metadata.append({
+                'label': f'{self.class_names[int(cls)]}-gt',
+                'score': 100,
+                'box': {
+                    'x': xyxy[0],
+                    'y': xyxy[1],
+                    'x2': xyxy[2],
+                    'y2': xyxy[3]},})
+        for *xyxy, conf, cls in filtered_detections.tolist():
+            metadata.append({
+                'label': f'{self.class_names[int(cls)]}',
+                'score': conf * 100,
+                'box': {
+                    'x': xyxy[0],
+                    'y': xyxy[1],
+                    'x2': xyxy[2],
+                    'y2': xyxy[3]},})
+
+        self.metadata_dict[image_name] = metadata
+        self.logged_images_count += 1
+
+        return
+
+    def preprocess_prediction(self, image, labels, shape, pred):
+        nl, _ = labels.shape[0], pred.shape[0]
+
+        # Predictions
+        if self.opt.single_cls:
+            pred[:, 5] = 0
+
+        predn = pred.clone()
+        scale_boxes(image.shape[1:], predn[:, :4], shape[0], shape[1])
+
+        labelsn = None
+        if nl:
+            tbox = xywh2xyxy(labels[:, 1:5])  # target boxes
+            scale_boxes(image.shape[1:], tbox, shape[0], shape[1])  # native-space labels
+            labelsn = torch.cat((labels[:, 0:1], tbox), 1)  # native-space labels
+            scale_boxes(image.shape[1:], predn[:, :4], shape[0], shape[1])  # native-space pred
+
+        return predn, labelsn
+
+    def add_assets_to_artifact(self, artifact, path, asset_path, split):
+        img_paths = sorted(glob.glob(f'{asset_path}/*'))
+        label_paths = img2label_paths(img_paths)
+
+        for image_file, label_file in zip(img_paths, label_paths):
+            image_logical_path, label_logical_path = map(lambda x: os.path.relpath(x, path), [image_file, label_file])
+
+            try:
+                artifact.add(image_file, logical_path=image_logical_path, metadata={'split': split})
+                artifact.add(label_file, logical_path=label_logical_path, metadata={'split': split})
+            except ValueError as e:
+                logger.error('COMET ERROR: Error adding file to Artifact. Skipping file.')
+                logger.error(f'COMET ERROR: {e}')
+                continue
+
+        return artifact
+
+    def upload_dataset_artifact(self):
+        dataset_name = self.data_dict.get('dataset_name', 'yolov5-dataset')
+        path = str((ROOT / Path(self.data_dict['path'])).resolve())
+
+        metadata = self.data_dict.copy()
+        for key in ['train', 'val', 'test']:
+            split_path = metadata.get(key)
+            if split_path is not None:
+                metadata[key] = split_path.replace(path, '')
+
+        artifact = comet_ml.Artifact(name=dataset_name, artifact_type='dataset', metadata=metadata)
+        for key in metadata.keys():
+            if key in ['train', 'val', 'test']:
+                if isinstance(self.upload_dataset, str) and (key != self.upload_dataset):
+                    continue
+
+                asset_path = self.data_dict.get(key)
+                if asset_path is not None:
+                    artifact = self.add_assets_to_artifact(artifact, path, asset_path, key)
+
+        self.experiment.log_artifact(artifact)
+
+        return
+
+    def download_dataset_artifact(self, artifact_path):
+        logged_artifact = self.experiment.get_artifact(artifact_path)
+        artifact_save_dir = str(Path(self.opt.save_dir) / logged_artifact.name)
+        logged_artifact.download(artifact_save_dir)
+
+        metadata = logged_artifact.metadata
+        data_dict = metadata.copy()
+        data_dict['path'] = artifact_save_dir
+
+        metadata_names = metadata.get('names')
+        if type(metadata_names) == dict:
+            data_dict['names'] = {int(k): v for k, v in metadata.get('names').items()}
+        elif type(metadata_names) == list:
+            data_dict['names'] = {int(k): v for k, v in zip(range(len(metadata_names)), metadata_names)}
+        else:
+            raise "Invalid 'names' field in dataset yaml file. Please use a list or dictionary"
+
+        data_dict = self.update_data_paths(data_dict)
+        return data_dict
+
+    def update_data_paths(self, data_dict):
+        path = data_dict.get('path', '')
+
+        for split in ['train', 'val', 'test']:
+            if data_dict.get(split):
+                split_path = data_dict.get(split)
+                data_dict[split] = (f'{path}/{split_path}' if isinstance(split, str) else [
+                    f'{path}/{x}' for x in split_path])
+
+        return data_dict
+
+    def on_pretrain_routine_end(self, paths):
+        if self.opt.resume:
+            return
+
+        for path in paths:
+            self.log_asset(str(path))
+
+        if self.upload_dataset:
+            if not self.resume:
+                self.upload_dataset_artifact()
+
+        return
+
+    def on_train_start(self):
+        self.log_parameters(self.hyp)
+
+    def on_train_epoch_start(self):
+        return
+
+    def on_train_epoch_end(self, epoch):
+        self.experiment.curr_epoch = epoch
+
+        return
+
+    def on_train_batch_start(self):
+        return
+
+    def on_train_batch_end(self, log_dict, step):
+        self.experiment.curr_step = step
+        if self.log_batch_metrics and (step % self.comet_log_batch_interval == 0):
+            self.log_metrics(log_dict, step=step)
+
+        return
+
+    def on_train_end(self, files, save_dir, last, best, epoch, results):
+        if self.comet_log_predictions:
+            curr_epoch = self.experiment.curr_epoch
+            self.experiment.log_asset_data(self.metadata_dict, 'image-metadata.json', epoch=curr_epoch)
+
+        for f in files:
+            self.log_asset(f, metadata={'epoch': epoch})
+        self.log_asset(f'{save_dir}/results.csv', metadata={'epoch': epoch})
+
+        if not self.opt.evolve:
+            model_path = str(best if best.exists() else last)
+            name = Path(model_path).name
+            if self.save_model:
+                self.experiment.log_model(
+                    self.model_name,
+                    file_or_folder=model_path,
+                    file_name=name,
+                    overwrite=True,
+                )
+
+        # Check if running Experiment with Comet Optimizer
+        if hasattr(self.opt, 'comet_optimizer_id'):
+            metric = results.get(self.opt.comet_optimizer_metric)
+            self.experiment.log_other('optimizer_metric_value', metric)
+
+        self.finish_run()
+
+    def on_val_start(self):
+        return
+
+    def on_val_batch_start(self):
+        return
+
+    def on_val_batch_end(self, batch_i, images, targets, paths, shapes, outputs):
+        if not (self.comet_log_predictions and ((batch_i + 1) % self.comet_log_prediction_interval == 0)):
+            return
+
+        for si, pred in enumerate(outputs):
+            if len(pred) == 0:
+                continue
+
+            image = images[si]
+            labels = targets[targets[:, 0] == si, 1:]
+            shape = shapes[si]
+            path = paths[si]
+            predn, labelsn = self.preprocess_prediction(image, labels, shape, pred)
+            if labelsn is not None:
+                self.log_predictions(image, labelsn, path, shape, predn)
+
+        return
+
+    def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix):
+        if self.comet_log_per_class_metrics:
+            if self.num_classes > 1:
+                for i, c in enumerate(ap_class):
+                    class_name = self.class_names[c]
+                    self.experiment.log_metrics(
+                        {
+                            'mAP@.5': ap50[i],
+                            'mAP@.5:.95': ap[i],
+                            'precision': p[i],
+                            'recall': r[i],
+                            'f1': f1[i],
+                            'true_positives': tp[i],
+                            'false_positives': fp[i],
+                            'support': nt[c]},
+                        prefix=class_name)
+
+        if self.comet_log_confusion_matrix:
+            epoch = self.experiment.curr_epoch
+            class_names = list(self.class_names.values())
+            class_names.append('background')
+            num_classes = len(class_names)
+
+            self.experiment.log_confusion_matrix(
+                matrix=confusion_matrix.matrix,
+                max_categories=num_classes,
+                labels=class_names,
+                epoch=epoch,
+                column_label='Actual Category',
+                row_label='Predicted Category',
+                file_name=f'confusion-matrix-epoch-{epoch}.json',
+            )
+
+    def on_fit_epoch_end(self, result, epoch):
+        self.log_metrics(result, epoch=epoch)
+
+    def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
+        if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1:
+            self.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi)
+
+    def on_params_update(self, params):
+        self.log_parameters(params)
+
+    def finish_run(self):
+        self.experiment.end()
diff --git a/utils/loggers/comet/comet_utils.py b/utils/loggers/comet/comet_utils.py
new file mode 100644
index 000000000000..27600761ad28
--- /dev/null
+++ b/utils/loggers/comet/comet_utils.py
@@ -0,0 +1,150 @@
+import logging
+import os
+from urllib.parse import urlparse
+
+try:
+    import comet_ml
+except (ModuleNotFoundError, ImportError):
+    comet_ml = None
+
+import yaml
+
+logger = logging.getLogger(__name__)
+
+COMET_PREFIX = 'comet://'
+COMET_MODEL_NAME = os.getenv('COMET_MODEL_NAME', 'yolov5')
+COMET_DEFAULT_CHECKPOINT_FILENAME = os.getenv('COMET_DEFAULT_CHECKPOINT_FILENAME', 'last.pt')
+
+
+def download_model_checkpoint(opt, experiment):
+    model_dir = f'{opt.project}/{experiment.name}'
+    os.makedirs(model_dir, exist_ok=True)
+
+    model_name = COMET_MODEL_NAME
+    model_asset_list = experiment.get_model_asset_list(model_name)
+
+    if len(model_asset_list) == 0:
+        logger.error(f'COMET ERROR: No checkpoints found for model name : {model_name}')
+        return
+
+    model_asset_list = sorted(
+        model_asset_list,
+        key=lambda x: x['step'],
+        reverse=True,
+    )
+    logged_checkpoint_map = {asset['fileName']: asset['assetId'] for asset in model_asset_list}
+
+    resource_url = urlparse(opt.weights)
+    checkpoint_filename = resource_url.query
+
+    if checkpoint_filename:
+        asset_id = logged_checkpoint_map.get(checkpoint_filename)
+    else:
+        asset_id = logged_checkpoint_map.get(COMET_DEFAULT_CHECKPOINT_FILENAME)
+        checkpoint_filename = COMET_DEFAULT_CHECKPOINT_FILENAME
+
+    if asset_id is None:
+        logger.error(f'COMET ERROR: Checkpoint {checkpoint_filename} not found in the given Experiment')
+        return
+
+    try:
+        logger.info(f'COMET INFO: Downloading checkpoint {checkpoint_filename}')
+        asset_filename = checkpoint_filename
+
+        model_binary = experiment.get_asset(asset_id, return_type='binary', stream=False)
+        model_download_path = f'{model_dir}/{asset_filename}'
+        with open(model_download_path, 'wb') as f:
+            f.write(model_binary)
+
+        opt.weights = model_download_path
+
+    except Exception as e:
+        logger.warning('COMET WARNING: Unable to download checkpoint from Comet')
+        logger.exception(e)
+
+
+def set_opt_parameters(opt, experiment):
+    """Update the opts Namespace with parameters
+    from Comet's ExistingExperiment when resuming a run
+
+    Args:
+        opt (argparse.Namespace): Namespace of command line options
+        experiment (comet_ml.APIExperiment): Comet API Experiment object
+    """
+    asset_list = experiment.get_asset_list()
+    resume_string = opt.resume
+
+    for asset in asset_list:
+        if asset['fileName'] == 'opt.yaml':
+            asset_id = asset['assetId']
+            asset_binary = experiment.get_asset(asset_id, return_type='binary', stream=False)
+            opt_dict = yaml.safe_load(asset_binary)
+            for key, value in opt_dict.items():
+                setattr(opt, key, value)
+            opt.resume = resume_string
+
+    # Save hyperparameters to YAML file
+    # Necessary to pass checks in training script
+    save_dir = f'{opt.project}/{experiment.name}'
+    os.makedirs(save_dir, exist_ok=True)
+
+    hyp_yaml_path = f'{save_dir}/hyp.yaml'
+    with open(hyp_yaml_path, 'w') as f:
+        yaml.dump(opt.hyp, f)
+    opt.hyp = hyp_yaml_path
+
+
+def check_comet_weights(opt):
+    """Downloads model weights from Comet and updates the
+    weights path to point to saved weights location
+
+    Args:
+        opt (argparse.Namespace): Command Line arguments passed
+            to YOLOv5 training script
+
+    Returns:
+        None/bool: Return True if weights are successfully downloaded
+            else return None
+    """
+    if comet_ml is None:
+        return
+
+    if isinstance(opt.weights, str):
+        if opt.weights.startswith(COMET_PREFIX):
+            api = comet_ml.API()
+            resource = urlparse(opt.weights)
+            experiment_path = f'{resource.netloc}{resource.path}'
+            experiment = api.get(experiment_path)
+            download_model_checkpoint(opt, experiment)
+            return True
+
+    return None
+
+
+def check_comet_resume(opt):
+    """Restores run parameters to its original state based on the model checkpoint
+    and logged Experiment parameters.
+
+    Args:
+        opt (argparse.Namespace): Command Line arguments passed
+            to YOLOv5 training script
+
+    Returns:
+        None/bool: Return True if the run is restored successfully
+            else return None
+    """
+    if comet_ml is None:
+        return
+
+    if isinstance(opt.resume, str):
+        if opt.resume.startswith(COMET_PREFIX):
+            api = comet_ml.API()
+            resource = urlparse(opt.resume)
+            experiment_path = f'{resource.netloc}{resource.path}'
+            experiment = api.get(experiment_path)
+            set_opt_parameters(opt, experiment)
+            download_model_checkpoint(opt, experiment)
+
+            return True
+
+    return None
diff --git a/utils/loggers/comet/hpo.py b/utils/loggers/comet/hpo.py
new file mode 100644
index 000000000000..fc49115c1358
--- /dev/null
+++ b/utils/loggers/comet/hpo.py
@@ -0,0 +1,118 @@
+import argparse
+import json
+import logging
+import os
+import sys
+from pathlib import Path
+
+import comet_ml
+
+logger = logging.getLogger(__name__)
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[3]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+
+from train import train
+from utils.callbacks import Callbacks
+from utils.general import increment_path
+from utils.torch_utils import select_device
+
+# Project Configuration
+config = comet_ml.config.get_config()
+COMET_PROJECT_NAME = config.get_string(os.getenv('COMET_PROJECT_NAME'), 'comet.project_name', default='yolov5')
+
+
+def get_args(known=False):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path')
+    parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
+    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
+    parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
+    parser.add_argument('--epochs', type=int, default=300, help='total training epochs')
+    parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch')
+    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
+    parser.add_argument('--rect', action='store_true', help='rectangular training')
+    parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
+    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
+    parser.add_argument('--noval', action='store_true', help='only validate final epoch')
+    parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor')
+    parser.add_argument('--noplots', action='store_true', help='save no plot files')
+    parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
+    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
+    parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
+    parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
+    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
+    parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
+    parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
+    parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
+    parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
+    parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
+    parser.add_argument('--name', default='exp', help='save to project/name')
+    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
+    parser.add_argument('--quad', action='store_true', help='quad dataloader')
+    parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler')
+    parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
+    parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
+    parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
+    parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
+    parser.add_argument('--seed', type=int, default=0, help='Global training seed')
+    parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify')
+
+    # Weights & Biases arguments
+    parser.add_argument('--entity', default=None, help='W&B: Entity')
+    parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option')
+    parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
+    parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')
+
+    # Comet Arguments
+    parser.add_argument('--comet_optimizer_config', type=str, help='Comet: Path to a Comet Optimizer Config File.')
+    parser.add_argument('--comet_optimizer_id', type=str, help='Comet: ID of the Comet Optimizer sweep.')
+    parser.add_argument('--comet_optimizer_objective', type=str, help="Comet: Set to 'minimize' or 'maximize'.")
+    parser.add_argument('--comet_optimizer_metric', type=str, help='Comet: Metric to Optimize.')
+    parser.add_argument('--comet_optimizer_workers',
+                        type=int,
+                        default=1,
+                        help='Comet: Number of Parallel Workers to use with the Comet Optimizer.')
+
+    return parser.parse_known_args()[0] if known else parser.parse_args()
+
+
+def run(parameters, opt):
+    hyp_dict = {k: v for k, v in parameters.items() if k not in ['epochs', 'batch_size']}
+
+    opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve))
+    opt.batch_size = parameters.get('batch_size')
+    opt.epochs = parameters.get('epochs')
+
+    device = select_device(opt.device, batch_size=opt.batch_size)
+    train(hyp_dict, opt, device, callbacks=Callbacks())
+
+
+if __name__ == '__main__':
+    opt = get_args(known=True)
+
+    opt.weights = str(opt.weights)
+    opt.cfg = str(opt.cfg)
+    opt.data = str(opt.data)
+    opt.project = str(opt.project)
+
+    optimizer_id = os.getenv('COMET_OPTIMIZER_ID')
+    if optimizer_id is None:
+        with open(opt.comet_optimizer_config) as f:
+            optimizer_config = json.load(f)
+        optimizer = comet_ml.Optimizer(optimizer_config)
+    else:
+        optimizer = comet_ml.Optimizer(optimizer_id)
+
+    opt.comet_optimizer_id = optimizer.id
+    status = optimizer.status()
+
+    opt.comet_optimizer_objective = status['spec']['objective']
+    opt.comet_optimizer_metric = status['spec']['metric']
+
+    logger.info('COMET INFO: Starting Hyperparameter Sweep')
+    for parameter in optimizer.get_parameters():
+        run(parameter['parameters'], opt)
diff --git a/utils/loggers/comet/optimizer_config.json b/utils/loggers/comet/optimizer_config.json
new file mode 100644
index 000000000000..83ddddab6f20
--- /dev/null
+++ b/utils/loggers/comet/optimizer_config.json
@@ -0,0 +1,209 @@
+{
+  "algorithm": "random",
+  "parameters": {
+    "anchor_t": {
+      "type": "discrete",
+      "values": [
+        2,
+        8
+      ]
+    },
+    "batch_size": {
+      "type": "discrete",
+      "values": [
+        16,
+        32,
+        64
+      ]
+    },
+    "box": {
+      "type": "discrete",
+      "values": [
+        0.02,
+        0.2
+      ]
+    },
+    "cls": {
+      "type": "discrete",
+      "values": [
+        0.2
+      ]
+    },
+    "cls_pw": {
+      "type": "discrete",
+      "values": [
+        0.5
+      ]
+    },
+    "copy_paste": {
+      "type": "discrete",
+      "values": [
+        1
+      ]
+    },
+    "degrees": {
+      "type": "discrete",
+      "values": [
+        0,
+        45
+      ]
+    },
+    "epochs": {
+      "type": "discrete",
+      "values": [
+        5
+      ]
+    },
+    "fl_gamma": {
+      "type": "discrete",
+      "values": [
+        0
+      ]
+    },
+    "fliplr": {
+      "type": "discrete",
+      "values": [
+        0
+      ]
+    },
+    "flipud": {
+      "type": "discrete",
+      "values": [
+        0
+      ]
+    },
+    "hsv_h": {
+      "type": "discrete",
+      "values": [
+        0
+      ]
+    },
+    "hsv_s": {
+      "type": "discrete",
+      "values": [
+        0
+      ]
+    },
+    "hsv_v": {
+      "type": "discrete",
+      "values": [
+        0
+      ]
+    },
+    "iou_t": {
+      "type": "discrete",
+      "values": [
+        0.7
+      ]
+    },
+    "lr0": {
+      "type": "discrete",
+      "values": [
+        1e-05,
+        0.1
+      ]
+    },
+    "lrf": {
+      "type": "discrete",
+      "values": [
+        0.01,
+        1
+      ]
+    },
+    "mixup": {
+      "type": "discrete",
+      "values": [
+        1
+      ]
+    },
+    "momentum": {
+      "type": "discrete",
+      "values": [
+        0.6
+      ]
+    },
+    "mosaic": {
+      "type": "discrete",
+      "values": [
+        0
+      ]
+    },
+    "obj": {
+      "type": "discrete",
+      "values": [
+        0.2
+      ]
+    },
+    "obj_pw": {
+      "type": "discrete",
+      "values": [
+        0.5
+      ]
+    },
+    "optimizer": {
+      "type": "categorical",
+      "values": [
+        "SGD",
+        "Adam",
+        "AdamW"
+      ]
+    },
+    "perspective": {
+      "type": "discrete",
+      "values": [
+        0
+      ]
+    },
+    "scale": {
+      "type": "discrete",
+      "values": [
+        0
+      ]
+    },
+    "shear": {
+      "type": "discrete",
+      "values": [
+        0
+      ]
+    },
+    "translate": {
+      "type": "discrete",
+      "values": [
+        0
+      ]
+    },
+    "warmup_bias_lr": {
+      "type": "discrete",
+      "values": [
+        0,
+        0.2
+      ]
+    },
+    "warmup_epochs": {
+      "type": "discrete",
+      "values": [
+        5
+      ]
+    },
+    "warmup_momentum": {
+      "type": "discrete",
+      "values": [
+        0,
+        0.95
+      ]
+    },
+    "weight_decay": {
+      "type": "discrete",
+      "values": [
+        0,
+        0.001
+      ]
+    }
+  },
+  "spec": {
+    "maxCombo": 0,
+    "metric": "metrics/mAP_0.5",
+    "objective": "maximize"
+  },
+  "trials": 1
+}
diff --git a/utils/loss.py b/utils/loss.py
index 9e78df17fdf3..9b9c3d9f8018 100644
--- a/utils/loss.py
+++ b/utils/loss.py
@@ -1,10 +1,13 @@
-# Loss functions
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Loss functions
+"""
 
 import torch
 import torch.nn as nn
 
-from utils.general import bbox_iou
-from utils.torch_utils import is_parallel
+from utils.metrics import bbox_iou
+from utils.torch_utils import de_parallel
 
 
 def smooth_BCE(eps=0.1):  # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
@@ -15,7 +18,7 @@ def smooth_BCE(eps=0.1):  # https://github.com/ultralytics/yolov3/issues/238#iss
 class BCEBlurWithLogitsLoss(nn.Module):
     # BCEwithLogitLoss() with reduced missing label effects.
     def __init__(self, alpha=0.05):
-        super(BCEBlurWithLogitsLoss, self).__init__()
+        super().__init__()
         self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none')  # must be nn.BCEWithLogitsLoss()
         self.alpha = alpha
 
@@ -32,7 +35,7 @@ def forward(self, pred, true):
 class FocalLoss(nn.Module):
     # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
     def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
-        super(FocalLoss, self).__init__()
+        super().__init__()
         self.loss_fcn = loss_fcn  # must be nn.BCEWithLogitsLoss()
         self.gamma = gamma
         self.alpha = alpha
@@ -62,7 +65,7 @@ def forward(self, pred, true):
 class QFocalLoss(nn.Module):
     # Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
     def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
-        super(QFocalLoss, self).__init__()
+        super().__init__()
         self.loss_fcn = loss_fcn  # must be nn.BCEWithLogitsLoss()
         self.gamma = gamma
         self.alpha = alpha
@@ -86,9 +89,10 @@ def forward(self, pred, true):
 
 
 class ComputeLoss:
+    sort_obj_iou = False
+
     # Compute losses
     def __init__(self, model, autobalance=False):
-        super(ComputeLoss, self).__init__()
         device = next(model.parameters()).device  # get model device
         h = model.hyp  # hyperparameters
 
@@ -104,42 +108,53 @@ def __init__(self, model, autobalance=False):
         if g > 0:
             BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
 
-        det = model.module.model[-1] if is_parallel(model) else model.model[-1]  # Detect() module
-        self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02])  # P3-P7
-        self.ssi = list(det.stride).index(16) if autobalance else 0  # stride 16 index
-        self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, model.gr, h, autobalance
-        for k in 'na', 'nc', 'nl', 'anchors':
-            setattr(self, k, getattr(det, k))
-
-    def __call__(self, p, targets):  # predictions, targets, model
-        device = targets.device
-        lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
+        m = de_parallel(model).model[-1]  # Detect() module
+        self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02])  # P3-P7
+        self.ssi = list(m.stride).index(16) if autobalance else 0  # stride 16 index
+        self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance
+        self.na = m.na  # number of anchors
+        self.nc = m.nc  # number of classes
+        self.nl = m.nl  # number of layers
+        self.anchors = m.anchors
+        self.device = device
+
+    def __call__(self, p, targets):  # predictions, targets
+        lcls = torch.zeros(1, device=self.device)  # class loss
+        lbox = torch.zeros(1, device=self.device)  # box loss
+        lobj = torch.zeros(1, device=self.device)  # object loss
         tcls, tbox, indices, anchors = self.build_targets(p, targets)  # targets
 
         # Losses
         for i, pi in enumerate(p):  # layer index, layer predictions
             b, a, gj, gi = indices[i]  # image, anchor, gridy, gridx
-            tobj = torch.zeros_like(pi[..., 0], device=device)  # target obj
+            tobj = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device)  # target obj
 
             n = b.shape[0]  # number of targets
             if n:
-                ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets
+                # pxy, pwh, _, pcls = pi[b, a, gj, gi].tensor_split((2, 4, 5), dim=1)  # faster, requires torch 1.8.0
+                pxy, pwh, _, pcls = pi[b, a, gj, gi].split((2, 2, 1, self.nc), 1)  # target-subset of predictions
 
                 # Regression
-                pxy = ps[:, :2].sigmoid() * 2. - 0.5
-                pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
+                pxy = pxy.sigmoid() * 2 - 0.5
+                pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i]
                 pbox = torch.cat((pxy, pwh), 1)  # predicted box
-                iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True)  # iou(prediction, target)
+                iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze()  # iou(prediction, target)
                 lbox += (1.0 - iou).mean()  # iou loss
 
                 # Objectness
-                tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * iou.detach().clamp(0).type(tobj.dtype)  # iou ratio
+                iou = iou.detach().clamp(0).type(tobj.dtype)
+                if self.sort_obj_iou:
+                    j = iou.argsort()
+                    b, a, gj, gi, iou = b[j], a[j], gj[j], gi[j], iou[j]
+                if self.gr < 1:
+                    iou = (1.0 - self.gr) + self.gr * iou
+                tobj[b, a, gj, gi] = iou  # iou ratio
 
                 # Classification
                 if self.nc > 1:  # cls loss (only if multiple classes)
-                    t = torch.full_like(ps[:, 5:], self.cn, device=device)  # targets
+                    t = torch.full_like(pcls, self.cn, device=self.device)  # targets
                     t[range(n), tcls[i]] = self.cp
-                    lcls += self.BCEcls(ps[:, 5:], t)  # BCE
+                    lcls += self.BCEcls(pcls, t)  # BCE
 
                 # Append targets to text file
                 # with open('targets.txt', 'a') as file:
@@ -157,41 +172,46 @@ def __call__(self, p, targets):  # predictions, targets, model
         lcls *= self.hyp['cls']
         bs = tobj.shape[0]  # batch size
 
-        loss = lbox + lobj + lcls
-        return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach()
+        return (lbox + lobj + lcls) * bs, torch.cat((lbox, lobj, lcls)).detach()
 
     def build_targets(self, p, targets):
         # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
         na, nt = self.na, targets.shape[0]  # number of anchors, targets
         tcls, tbox, indices, anch = [], [], [], []
-        gain = torch.ones(7, device=targets.device)  # normalized to gridspace gain
-        ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)  # same as .repeat_interleave(nt)
-        targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2)  # append anchor indices
+        gain = torch.ones(7, device=self.device)  # normalized to gridspace gain
+        ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt)  # same as .repeat_interleave(nt)
+        targets = torch.cat((targets.repeat(na, 1, 1), ai[..., None]), 2)  # append anchor indices
 
         g = 0.5  # bias
-        off = torch.tensor([[0, 0],
-                            [1, 0], [0, 1], [-1, 0], [0, -1],  # j,k,l,m
-                            # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
-                            ], device=targets.device).float() * g  # offsets
+        off = torch.tensor(
+            [
+                [0, 0],
+                [1, 0],
+                [0, 1],
+                [-1, 0],
+                [0, -1],  # j,k,l,m
+                # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
+            ],
+            device=self.device).float() * g  # offsets
 
         for i in range(self.nl):
-            anchors = self.anchors[i]
-            gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]]  # xyxy gain
+            anchors, shape = self.anchors[i], p[i].shape
+            gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]]  # xyxy gain
 
             # Match targets to anchors
-            t = targets * gain
+            t = targets * gain  # shape(3,n,7)
             if nt:
                 # Matches
-                r = t[:, :, 4:6] / anchors[:, None]  # wh ratio
-                j = torch.max(r, 1. / r).max(2)[0] < self.hyp['anchor_t']  # compare
+                r = t[..., 4:6] / anchors[:, None]  # wh ratio
+                j = torch.max(r, 1 / r).max(2)[0] < self.hyp['anchor_t']  # compare
                 # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t']  # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
                 t = t[j]  # filter
 
                 # Offsets
                 gxy = t[:, 2:4]  # grid xy
                 gxi = gain[[2, 3]] - gxy  # inverse
-                j, k = ((gxy % 1. < g) & (gxy > 1.)).T
-                l, m = ((gxi % 1. < g) & (gxi > 1.)).T
+                j, k = ((gxy % 1 < g) & (gxy > 1)).T
+                l, m = ((gxi % 1 < g) & (gxi > 1)).T
                 j = torch.stack((torch.ones_like(j), j, k, l, m))
                 t = t.repeat((5, 1, 1))[j]
                 offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
@@ -200,15 +220,13 @@ def build_targets(self, p, targets):
                 offsets = 0
 
             # Define
-            b, c = t[:, :2].long().T  # image, class
-            gxy = t[:, 2:4]  # grid xy
-            gwh = t[:, 4:6]  # grid wh
+            bc, gxy, gwh, a = t.chunk(4, 1)  # (image, class), grid xy, grid wh, anchors
+            a, (b, c) = a.long().view(-1), bc.long().T  # anchors, image, class
             gij = (gxy - offsets).long()
-            gi, gj = gij.T  # grid xy indices
+            gi, gj = gij.T  # grid indices
 
             # Append
-            a = t[:, 6].long()  # anchor indices
-            indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1)))  # image, anchor, grid indices
+            indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1)))  # image, anchor, grid
             tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
             anch.append(anchors[a])  # anchors
             tcls.append(c)  # class
diff --git a/utils/metrics.py b/utils/metrics.py
index 323c84b6c873..95f364c23f34 100644
--- a/utils/metrics.py
+++ b/utils/metrics.py
@@ -1,12 +1,17 @@
-# Model validation metrics
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Model validation metrics
+"""
 
+import math
+import warnings
 from pathlib import Path
 
 import matplotlib.pyplot as plt
 import numpy as np
 import torch
 
-from . import general
+from utils import TryExcept, threaded
 
 
 def fitness(x):
@@ -15,7 +20,15 @@ def fitness(x):
     return (x[:, :4] * w).sum(1)
 
 
-def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=()):
+def smooth(y, f=0.05):
+    # Box filter of fraction f
+    nf = round(len(y) * f * 2) // 2 + 1  # number of filter elements (must be odd)
+    p = np.ones(nf // 2)  # ones padding
+    yp = np.concatenate((p * y[0], y, p * y[-1]), 0)  # y padded
+    return np.convolve(yp, np.ones(nf) / nf, mode='valid')  # y-smoothed
+
+
+def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16, prefix=''):
     """ Compute the average precision, given the recall and precision curves.
     Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
     # Arguments
@@ -34,7 +47,7 @@ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names
     tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
 
     # Find unique classes
-    unique_classes = np.unique(target_cls)
+    unique_classes, nt = np.unique(target_cls, return_counts=True)
     nc = unique_classes.shape[0]  # number of classes, number of detections
 
     # Create Precision-Recall curve and compute AP for each class
@@ -42,40 +55,44 @@ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names
     ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
     for ci, c in enumerate(unique_classes):
         i = pred_cls == c
-        n_l = (target_cls == c).sum()  # number of labels
+        n_l = nt[ci]  # number of labels
         n_p = i.sum()  # number of predictions
-
         if n_p == 0 or n_l == 0:
             continue
-        else:
-            # Accumulate FPs and TPs
-            fpc = (1 - tp[i]).cumsum(0)
-            tpc = tp[i].cumsum(0)
 
-            # Recall
-            recall = tpc / (n_l + 1e-16)  # recall curve
-            r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0)  # negative x, xp because xp decreases
+        # Accumulate FPs and TPs
+        fpc = (1 - tp[i]).cumsum(0)
+        tpc = tp[i].cumsum(0)
+
+        # Recall
+        recall = tpc / (n_l + eps)  # recall curve
+        r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0)  # negative x, xp because xp decreases
 
-            # Precision
-            precision = tpc / (tpc + fpc)  # precision curve
-            p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1)  # p at pr_score
+        # Precision
+        precision = tpc / (tpc + fpc)  # precision curve
+        p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1)  # p at pr_score
 
-            # AP from recall-precision curve
-            for j in range(tp.shape[1]):
-                ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
-                if plot and j == 0:
-                    py.append(np.interp(px, mrec, mpre))  # precision at mAP@0.5
+        # AP from recall-precision curve
+        for j in range(tp.shape[1]):
+            ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
+            if plot and j == 0:
+                py.append(np.interp(px, mrec, mpre))  # precision at mAP@0.5
 
     # Compute F1 (harmonic mean of precision and recall)
-    f1 = 2 * p * r / (p + r + 1e-16)
+    f1 = 2 * p * r / (p + r + eps)
+    names = [v for k, v in names.items() if k in unique_classes]  # list: only classes that have data
+    names = dict(enumerate(names))  # to dict
     if plot:
-        plot_pr_curve(px, py, ap, Path(save_dir) / 'PR_curve.png', names)
-        plot_mc_curve(px, f1, Path(save_dir) / 'F1_curve.png', names, ylabel='F1')
-        plot_mc_curve(px, p, Path(save_dir) / 'P_curve.png', names, ylabel='Precision')
-        plot_mc_curve(px, r, Path(save_dir) / 'R_curve.png', names, ylabel='Recall')
+        plot_pr_curve(px, py, ap, Path(save_dir) / f'{prefix}PR_curve.png', names)
+        plot_mc_curve(px, f1, Path(save_dir) / f'{prefix}F1_curve.png', names, ylabel='F1')
+        plot_mc_curve(px, p, Path(save_dir) / f'{prefix}P_curve.png', names, ylabel='Precision')
+        plot_mc_curve(px, r, Path(save_dir) / f'{prefix}R_curve.png', names, ylabel='Recall')
 
-    i = f1.mean(0).argmax()  # max F1 index
-    return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype('int32')
+    i = smooth(f1.mean(0), 0.1).argmax()  # max F1 index
+    p, r, f1 = p[:, i], r[:, i], f1[:, i]
+    tp = (r * nt).round()  # true positives
+    fp = (tp / (p + eps) - tp).round()  # false positives
+    return tp, fp, p, r, f1, ap, unique_classes.astype(int)
 
 
 def compute_ap(recall, precision):
@@ -88,8 +105,8 @@ def compute_ap(recall, precision):
     """
 
     # Append sentinel values to beginning and end
-    mrec = np.concatenate(([0.], recall, [recall[-1] + 0.01]))
-    mpre = np.concatenate(([1.], precision, [0.]))
+    mrec = np.concatenate(([0.0], recall, [1.0]))
+    mpre = np.concatenate(([1.0], precision, [0.0]))
 
     # Compute the precision envelope
     mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
@@ -124,10 +141,16 @@ def process_batch(self, detections, labels):
         Returns:
             None, updates confusion matrix accordingly
         """
+        if detections is None:
+            gt_classes = labels.int()
+            for gc in gt_classes:
+                self.matrix[self.nc, gc] += 1  # background FN
+            return
+
         detections = detections[detections[:, 4] > self.conf]
         gt_classes = labels[:, 0].int()
         detection_classes = detections[:, 5].int()
-        iou = general.box_iou(labels[:, 1:], detections[:, :4])
+        iou = box_iou(labels[:, 1:], detections[:, :4])
 
         x = torch.where(iou > self.iou_thres)
         if x[0].shape[0]:
@@ -141,49 +164,158 @@ def process_batch(self, detections, labels):
             matches = np.zeros((0, 3))
 
         n = matches.shape[0] > 0
-        m0, m1, _ = matches.transpose().astype(np.int16)
+        m0, m1, _ = matches.transpose().astype(int)
         for i, gc in enumerate(gt_classes):
             j = m0 == i
             if n and sum(j) == 1:
                 self.matrix[detection_classes[m1[j]], gc] += 1  # correct
             else:
-                self.matrix[self.nc, gc] += 1  # background FP
+                self.matrix[self.nc, gc] += 1  # true background
 
         if n:
             for i, dc in enumerate(detection_classes):
                 if not any(m1 == i):
-                    self.matrix[dc, self.nc] += 1  # background FN
-
-    def matrix(self):
-        return self.matrix
-
-    def plot(self, save_dir='', names=()):
-        try:
-            import seaborn as sn
-
-            array = self.matrix / (self.matrix.sum(0).reshape(1, self.nc + 1) + 1E-6)  # normalize
-            array[array < 0.005] = np.nan  # don't annotate (would appear as 0.00)
-
-            fig = plt.figure(figsize=(12, 9), tight_layout=True)
-            sn.set(font_scale=1.0 if self.nc < 50 else 0.8)  # for label size
-            labels = (0 < len(names) < 99) and len(names) == self.nc  # apply names to ticklabels
-            sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap='Blues', fmt='.2f', square=True,
-                       xticklabels=names + ['background FP'] if labels else "auto",
-                       yticklabels=names + ['background FN'] if labels else "auto").set_facecolor((1, 1, 1))
-            fig.axes[0].set_xlabel('True')
-            fig.axes[0].set_ylabel('Predicted')
-            fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250)
-        except Exception as e:
-            pass
+                    self.matrix[dc, self.nc] += 1  # predicted background
+
+    def tp_fp(self):
+        tp = self.matrix.diagonal()  # true positives
+        fp = self.matrix.sum(1) - tp  # false positives
+        # fn = self.matrix.sum(0) - tp  # false negatives (missed detections)
+        return tp[:-1], fp[:-1]  # remove background class
+
+    @TryExcept('WARNING ⚠️ ConfusionMatrix plot failure')
+    def plot(self, normalize=True, save_dir='', names=()):
+        import seaborn as sn
+
+        array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1E-9) if normalize else 1)  # normalize columns
+        array[array < 0.005] = np.nan  # don't annotate (would appear as 0.00)
+
+        fig, ax = plt.subplots(1, 1, figsize=(12, 9), tight_layout=True)
+        nc, nn = self.nc, len(names)  # number of classes, names
+        sn.set(font_scale=1.0 if nc < 50 else 0.8)  # for label size
+        labels = (0 < nn < 99) and (nn == nc)  # apply names to ticklabels
+        ticklabels = (names + ['background']) if labels else 'auto'
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore')  # suppress empty matrix RuntimeWarning: All-NaN slice encountered
+            sn.heatmap(array,
+                       ax=ax,
+                       annot=nc < 30,
+                       annot_kws={
+                           'size': 8},
+                       cmap='Blues',
+                       fmt='.2f',
+                       square=True,
+                       vmin=0.0,
+                       xticklabels=ticklabels,
+                       yticklabels=ticklabels).set_facecolor((1, 1, 1))
+        ax.set_xlabel('True')
+        ax.set_ylabel('Predicted')
+        ax.set_title('Confusion Matrix')
+        fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250)
+        plt.close(fig)
 
     def print(self):
         for i in range(self.nc + 1):
             print(' '.join(map(str, self.matrix[i])))
 
 
+def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
+    # Returns Intersection over Union (IoU) of box1(1,4) to box2(n,4)
+
+    # Get the coordinates of bounding boxes
+    if xywh:  # transform from xywh to xyxy
+        (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, -1), box2.chunk(4, -1)
+        w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2
+        b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_
+        b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_
+    else:  # x1, y1, x2, y2 = box1
+        b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, -1)
+        b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, -1)
+        w1, h1 = b1_x2 - b1_x1, (b1_y2 - b1_y1).clamp(eps)
+        w2, h2 = b2_x2 - b2_x1, (b2_y2 - b2_y1).clamp(eps)
+
+    # Intersection area
+    inter = (b1_x2.minimum(b2_x2) - b1_x1.maximum(b2_x1)).clamp(0) * \
+            (b1_y2.minimum(b2_y2) - b1_y1.maximum(b2_y1)).clamp(0)
+
+    # Union Area
+    union = w1 * h1 + w2 * h2 - inter + eps
+
+    # IoU
+    iou = inter / union
+    if CIoU or DIoU or GIoU:
+        cw = b1_x2.maximum(b2_x2) - b1_x1.minimum(b2_x1)  # convex (smallest enclosing box) width
+        ch = b1_y2.maximum(b2_y2) - b1_y1.minimum(b2_y1)  # convex height
+        if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
+            c2 = cw ** 2 + ch ** 2 + eps  # convex diagonal squared
+            rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4  # center dist ** 2
+            if CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
+                v = (4 / math.pi ** 2) * (torch.atan(w2 / h2) - torch.atan(w1 / h1)).pow(2)
+                with torch.no_grad():
+                    alpha = v / (v - iou + (1 + eps))
+                return iou - (rho2 / c2 + v * alpha)  # CIoU
+            return iou - rho2 / c2  # DIoU
+        c_area = cw * ch + eps  # convex area
+        return iou - (c_area - union) / c_area  # GIoU https://arxiv.org/pdf/1902.09630.pdf
+    return iou  # IoU
+
+
+def box_iou(box1, box2, eps=1e-7):
+    # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
+    """
+    Return intersection-over-union (Jaccard index) of boxes.
+    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+    Arguments:
+        box1 (Tensor[N, 4])
+        box2 (Tensor[M, 4])
+    Returns:
+        iou (Tensor[N, M]): the NxM matrix containing the pairwise
+            IoU values for every element in boxes1 and boxes2
+    """
+
+    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
+    (a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2)
+    inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)
+
+    # IoU = inter / (area1 + area2 - inter)
+    return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps)
+
+
+def bbox_ioa(box1, box2, eps=1e-7):
+    """ Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2
+    box1:       np.array of shape(4)
+    box2:       np.array of shape(nx4)
+    returns:    np.array of shape(n)
+    """
+
+    # Get the coordinates of bounding boxes
+    b1_x1, b1_y1, b1_x2, b1_y2 = box1
+    b2_x1, b2_y1, b2_x2, b2_y2 = box2.T
+
+    # Intersection area
+    inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
+                 (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
+
+    # box2 area
+    box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps
+
+    # Intersection over box2 area
+    return inter_area / box2_area
+
+
+def wh_iou(wh1, wh2, eps=1e-7):
+    # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2
+    wh1 = wh1[:, None]  # [N,1,2]
+    wh2 = wh2[None]  # [1,M,2]
+    inter = torch.min(wh1, wh2).prod(2)  # [N,M]
+    return inter / (wh1.prod(2) + wh2.prod(2) - inter + eps)  # iou = inter / (area1 + area2 - inter)
+
+
 # Plots ----------------------------------------------------------------------------------------------------------------
 
-def plot_pr_curve(px, py, ap, save_dir='pr_curve.png', names=()):
+
+@threaded
+def plot_pr_curve(px, py, ap, save_dir=Path('pr_curve.png'), names=()):
     # Precision-recall curve
     fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
     py = np.stack(py, axis=1)
@@ -199,11 +331,14 @@ def plot_pr_curve(px, py, ap, save_dir='pr_curve.png', names=()):
     ax.set_ylabel('Precision')
     ax.set_xlim(0, 1)
     ax.set_ylim(0, 1)
-    plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
-    fig.savefig(Path(save_dir), dpi=250)
+    ax.legend(bbox_to_anchor=(1.04, 1), loc='upper left')
+    ax.set_title('Precision-Recall Curve')
+    fig.savefig(save_dir, dpi=250)
+    plt.close(fig)
 
 
-def plot_mc_curve(px, py, save_dir='mc_curve.png', names=(), xlabel='Confidence', ylabel='Metric'):
+@threaded
+def plot_mc_curve(px, py, save_dir=Path('mc_curve.png'), names=(), xlabel='Confidence', ylabel='Metric'):
     # Metric-confidence curve
     fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
 
@@ -213,11 +348,13 @@ def plot_mc_curve(px, py, save_dir='mc_curve.png', names=(), xlabel='Confidence'
     else:
         ax.plot(px, py.T, linewidth=1, color='grey')  # plot(confidence, metric)
 
-    y = py.mean(0)
+    y = smooth(py.mean(0), 0.05)
     ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}')
     ax.set_xlabel(xlabel)
     ax.set_ylabel(ylabel)
     ax.set_xlim(0, 1)
     ax.set_ylim(0, 1)
-    plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
-    fig.savefig(Path(save_dir), dpi=250)
+    ax.legend(bbox_to_anchor=(1.04, 1), loc='upper left')
+    ax.set_title(f'{ylabel}-Confidence Curve')
+    fig.savefig(save_dir, dpi=250)
+    plt.close(fig)
diff --git a/utils/plots.py b/utils/plots.py
index f24513c6998d..24c618c80b59 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -1,37 +1,197 @@
-# Plotting utils
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Plotting utils
+"""
 
-import glob
+import contextlib
 import math
 import os
-import random
 from copy import copy
 from pathlib import Path
+from urllib.error import URLError
 
 import cv2
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-import seaborn as sns
+import seaborn as sn
 import torch
-import yaml
 from PIL import Image, ImageDraw, ImageFont
-from scipy.signal import butter, filtfilt
 
-from utils.general import xywh2xyxy, xyxy2xywh
+from utils import TryExcept, threaded
+from utils.general import (CONFIG_DIR, FONT, LOGGER, check_font, check_requirements, clip_boxes, increment_path,
+                           is_ascii, xywh2xyxy, xyxy2xywh)
 from utils.metrics import fitness
+from utils.segment.general import scale_image
 
 # Settings
+RANK = int(os.getenv('RANK', -1))
 matplotlib.rc('font', **{'size': 11})
 matplotlib.use('Agg')  # for writing to files only
 
 
-def color_list():
-    # Return first 10 plt colors as (r,g,b) https://stackoverflow.com/questions/51350872/python-from-color-name-to-rgb
-    def hex2rgb(h):
+class Colors:
+    # Ultralytics color palette https://ultralytics.com/
+    def __init__(self):
+        # hex = matplotlib.colors.TABLEAU_COLORS.values()
+        hexs = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
+                '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
+        self.palette = [self.hex2rgb(f'#{c}') for c in hexs]
+        self.n = len(self.palette)
+
+    def __call__(self, i, bgr=False):
+        c = self.palette[int(i) % self.n]
+        return (c[2], c[1], c[0]) if bgr else c
+
+    @staticmethod
+    def hex2rgb(h):  # rgb order (PIL)
         return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))
 
-    return [hex2rgb(h) for h in matplotlib.colors.TABLEAU_COLORS.values()]  # or BASE_ (8), CSS4_ (148), XKCD_ (949)
+
+colors = Colors()  # create instance for 'from utils.plots import colors'
+
+
+def check_pil_font(font=FONT, size=10):
+    # Return a PIL TrueType Font, downloading to CONFIG_DIR if necessary
+    font = Path(font)
+    font = font if font.exists() else (CONFIG_DIR / font.name)
+    try:
+        return ImageFont.truetype(str(font) if font.exists() else font.name, size)
+    except Exception:  # download if missing
+        try:
+            check_font(font)
+            return ImageFont.truetype(str(font), size)
+        except TypeError:
+            check_requirements('Pillow>=8.4.0')  # known issue https://github.com/ultralytics/yolov5/issues/5374
+        except URLError:  # not online
+            return ImageFont.load_default()
+
+
+class Annotator:
+    # YOLOv5 Annotator for train/val mosaics and jpgs and detect/hub inference annotations
+    def __init__(self, im, line_width=None, font_size=None, font='Arial.ttf', pil=False, example='abc'):
+        assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images.'
+        non_ascii = not is_ascii(example)  # non-latin labels, i.e. asian, arabic, cyrillic
+        self.pil = pil or non_ascii
+        if self.pil:  # use PIL
+            self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
+            self.draw = ImageDraw.Draw(self.im)
+            self.font = check_pil_font(font='Arial.Unicode.ttf' if non_ascii else font,
+                                       size=font_size or max(round(sum(self.im.size) / 2 * 0.035), 12))
+        else:  # use cv2
+            self.im = im
+        self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2)  # line width
+
+    def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
+        # Add one xyxy box to image with label
+        if self.pil or not is_ascii(label):
+            self.draw.rectangle(box, width=self.lw, outline=color)  # box
+            if label:
+                w, h = self.font.getsize(label)  # text width, height (WARNING: deprecated) in 9.2.0
+                # _, _, w, h = self.font.getbbox(label)  # text width, height (New)
+                outside = box[1] - h >= 0  # label fits outside box
+                self.draw.rectangle(
+                    (box[0], box[1] - h if outside else box[1], box[0] + w + 1,
+                     box[1] + 1 if outside else box[1] + h + 1),
+                    fill=color,
+                )
+                # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls')  # for PIL>8.0
+                self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font)
+        else:  # cv2
+            p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
+            cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA)
+            if label:
+                tf = max(self.lw - 1, 1)  # font thickness
+                w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0]  # text width, height
+                outside = p1[1] - h >= 3
+                p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
+                cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA)  # filled
+                cv2.putText(self.im,
+                            label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2),
+                            0,
+                            self.lw / 3,
+                            txt_color,
+                            thickness=tf,
+                            lineType=cv2.LINE_AA)
+
+    def masks(self, masks, colors, im_gpu, alpha=0.5, retina_masks=False):
+        """Plot masks at once.
+        Args:
+            masks (tensor): predicted masks on cuda, shape: [n, h, w]
+            colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n]
+            im_gpu (tensor): img is in cuda, shape: [3, h, w], range: [0, 1]
+            alpha (float): mask transparency: 0.0 fully transparent, 1.0 opaque
+        """
+        if self.pil:
+            # convert to numpy first
+            self.im = np.asarray(self.im).copy()
+        if len(masks) == 0:
+            self.im[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255
+        colors = torch.tensor(colors, device=im_gpu.device, dtype=torch.float32) / 255.0
+        colors = colors[:, None, None]  # shape(n,1,1,3)
+        masks = masks.unsqueeze(3)  # shape(n,h,w,1)
+        masks_color = masks * (colors * alpha)  # shape(n,h,w,3)
+
+        inv_alph_masks = (1 - masks * alpha).cumprod(0)  # shape(n,h,w,1)
+        mcs = (masks_color * inv_alph_masks).sum(0) * 2  # mask color summand shape(n,h,w,3)
+
+        im_gpu = im_gpu.flip(dims=[0])  # flip channel
+        im_gpu = im_gpu.permute(1, 2, 0).contiguous()  # shape(h,w,3)
+        im_gpu = im_gpu * inv_alph_masks[-1] + mcs
+        im_mask = (im_gpu * 255).byte().cpu().numpy()
+        self.im[:] = im_mask if retina_masks else scale_image(im_gpu.shape, im_mask, self.im.shape)
+        if self.pil:
+            # convert im back to PIL and update draw
+            self.fromarray(self.im)
+
+    def rectangle(self, xy, fill=None, outline=None, width=1):
+        # Add rectangle to image (PIL-only)
+        self.draw.rectangle(xy, fill, outline, width)
+
+    def text(self, xy, text, txt_color=(255, 255, 255), anchor='top'):
+        # Add text to image (PIL-only)
+        if anchor == 'bottom':  # start y from font bottom
+            w, h = self.font.getsize(text)  # text width, height
+            xy[1] += 1 - h
+        self.draw.text(xy, text, fill=txt_color, font=self.font)
+
+    def fromarray(self, im):
+        # Update self.im from a numpy array
+        self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
+        self.draw = ImageDraw.Draw(self.im)
+
+    def result(self):
+        # Return annotated image as array
+        return np.asarray(self.im)
+
+
+def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detect/exp')):
+    """
+    x:              Features to be visualized
+    module_type:    Module type
+    stage:          Module stage within model
+    n:              Maximum number of feature maps to plot
+    save_dir:       Directory to save results
+    """
+    if 'Detect' not in module_type:
+        batch, channels, height, width = x.shape  # batch, channels, height, width
+        if height > 1 and width > 1:
+            f = save_dir / f"stage{stage}_{module_type.split('.')[-1]}_features.png"  # filename
+
+            blocks = torch.chunk(x[0].cpu(), channels, dim=0)  # select batch index 0, block by channels
+            n = min(n, channels)  # number of plots
+            fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True)  # 8 rows x n/8 cols
+            ax = ax.ravel()
+            plt.subplots_adjust(wspace=0.05, hspace=0.05)
+            for i in range(n):
+                ax[i].imshow(blocks[i].squeeze())  # cmap='gray'
+                ax[i].axis('off')
+
+            LOGGER.info(f'Saving {f}... ({n}/{channels})')
+            plt.savefig(f, dpi=300, bbox_inches='tight')
+            plt.close()
+            np.save(str(f.with_suffix('.npy')), x[0].cpu().numpy())  # npy save
 
 
 def hist2d(x, y, n=100):
@@ -44,6 +204,8 @@ def hist2d(x, y, n=100):
 
 
 def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5):
+    from scipy.signal import butter, filtfilt
+
     # https://stackoverflow.com/questions/28536191/how-to-filter-smooth-with-scipy-numpy
     def butter_lowpass(cutoff, fs, order):
         nyq = 0.5 * fs
@@ -54,142 +216,79 @@ def butter_lowpass(cutoff, fs, order):
     return filtfilt(b, a, data)  # forward-backward filter
 
 
-def plot_one_box(x, im, color=None, label=None, line_thickness=3):
-    # Plots one bounding box on image 'im' using OpenCV
-    assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.'
-    tl = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1  # line/font thickness
-    color = color or [random.randint(0, 255) for _ in range(3)]
-    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
-    cv2.rectangle(im, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
-    if label:
-        tf = max(tl - 1, 1)  # font thickness
-        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
-        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
-        cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA)  # filled
-        cv2.putText(im, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
-
-
-def plot_one_box_PIL(box, im, color=None, label=None, line_thickness=None):
-    # Plots one bounding box on image 'im' using PIL
-    im = Image.fromarray(im)
-    draw = ImageDraw.Draw(im)
-    line_thickness = line_thickness or max(int(min(im.size) / 200), 2)
-    draw.rectangle(box, width=line_thickness, outline=tuple(color))  # plot
-    if label:
-        fontsize = max(round(max(im.size) / 40), 12)
-        font = ImageFont.truetype("Arial.ttf", fontsize)
-        txt_width, txt_height = font.getsize(label)
-        draw.rectangle([box[0], box[1] - txt_height + 4, box[0] + txt_width, box[1]], fill=tuple(color))
-        draw.text((box[0], box[1] - txt_height + 1), label, fill=(255, 255, 255), font=font)
-    return np.asarray(im)
-
-
-def plot_wh_methods():  # from utils.plots import *; plot_wh_methods()
-    # Compares the two methods for width-height anchor multiplication
-    # https://github.com/ultralytics/yolov3/issues/168
-    x = np.arange(-4.0, 4.0, .1)
-    ya = np.exp(x)
-    yb = torch.sigmoid(torch.from_numpy(x)).numpy() * 2
-
-    fig = plt.figure(figsize=(6, 3), tight_layout=True)
-    plt.plot(x, ya, '.-', label='YOLOv3')
-    plt.plot(x, yb ** 2, '.-', label='YOLOv5 ^2')
-    plt.plot(x, yb ** 1.6, '.-', label='YOLOv5 ^1.6')
-    plt.xlim(left=-4, right=4)
-    plt.ylim(bottom=0, top=6)
-    plt.xlabel('input')
-    plt.ylabel('output')
-    plt.grid()
-    plt.legend()
-    fig.savefig('comparison.png', dpi=200)
-
-
-def output_to_target(output):
-    # Convert model output to target format [batch_id, class_id, x, y, w, h, conf]
+def output_to_target(output, max_det=300):
+    # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting
     targets = []
     for i, o in enumerate(output):
-        for *box, conf, cls in o.cpu().numpy():
-            targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf])
-    return np.array(targets)
+        box, conf, cls = o[:max_det, :6].cpu().split((4, 1, 1), 1)
+        j = torch.full((conf.shape[0], 1), i)
+        targets.append(torch.cat((j, cls, xyxy2xywh(box), conf), 1))
+    return torch.cat(targets, 0).numpy()
 
 
-def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=640, max_subplots=16):
+@threaded
+def plot_images(images, targets, paths=None, fname='images.jpg', names=None):
     # Plot image grid with labels
-
     if isinstance(images, torch.Tensor):
         images = images.cpu().float().numpy()
     if isinstance(targets, torch.Tensor):
         targets = targets.cpu().numpy()
 
-    # un-normalise
-    if np.max(images[0]) <= 1:
-        images *= 255
-
-    tl = 3  # line thickness
-    tf = max(tl - 1, 1)  # font thickness
+    max_size = 1920  # max image size
+    max_subplots = 16  # max image subplots, i.e. 4x4
     bs, _, h, w = images.shape  # batch size, _, height, width
     bs = min(bs, max_subplots)  # limit plot images
     ns = np.ceil(bs ** 0.5)  # number of subplots (square)
+    if np.max(images[0]) <= 1:
+        images *= 255  # de-normalise (optional)
 
-    # Check if we should resize
-    scale_factor = max_size / max(h, w)
-    if scale_factor < 1:
-        h = math.ceil(scale_factor * h)
-        w = math.ceil(scale_factor * w)
-
-    colors = color_list()  # list of colors
+    # Build Image
     mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
-    for i, img in enumerate(images):
+    for i, im in enumerate(images):
         if i == max_subplots:  # if last batch has fewer images than we expect
             break
-
-        block_x = int(w * (i // ns))
-        block_y = int(h * (i % ns))
-
-        img = img.transpose(1, 2, 0)
-        if scale_factor < 1:
-            img = cv2.resize(img, (w, h))
-
-        mosaic[block_y:block_y + h, block_x:block_x + w, :] = img
+        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
+        im = im.transpose(1, 2, 0)
+        mosaic[y:y + h, x:x + w, :] = im
+
+    # Resize (optional)
+    scale = max_size / ns / max(h, w)
+    if scale < 1:
+        h = math.ceil(scale * h)
+        w = math.ceil(scale * w)
+        mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))
+
+    # Annotate
+    fs = int((h + w) * ns * 0.01)  # font size
+    annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names)
+    for i in range(i + 1):
+        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
+        annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2)  # borders
+        if paths:
+            annotator.text((x + 5, y + 5), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220))  # filenames
         if len(targets) > 0:
-            image_targets = targets[targets[:, 0] == i]
-            boxes = xywh2xyxy(image_targets[:, 2:6]).T
-            classes = image_targets[:, 1].astype('int')
-            labels = image_targets.shape[1] == 6  # labels if no conf column
-            conf = None if labels else image_targets[:, 6]  # check for confidence presence (label vs pred)
+            ti = targets[targets[:, 0] == i]  # image targets
+            boxes = xywh2xyxy(ti[:, 2:6]).T
+            classes = ti[:, 1].astype('int')
+            labels = ti.shape[1] == 6  # labels if no conf column
+            conf = None if labels else ti[:, 6]  # check for confidence presence (label vs pred)
 
             if boxes.shape[1]:
                 if boxes.max() <= 1.01:  # if normalized with tolerance 0.01
                     boxes[[0, 2]] *= w  # scale to pixels
                     boxes[[1, 3]] *= h
-                elif scale_factor < 1:  # absolute coords need scale if image scales
-                    boxes *= scale_factor
-            boxes[[0, 2]] += block_x
-            boxes[[1, 3]] += block_y
-            for j, box in enumerate(boxes.T):
-                cls = int(classes[j])
-                color = colors[cls % len(colors)]
+                elif scale < 1:  # absolute coords need scale if image scales
+                    boxes *= scale
+            boxes[[0, 2]] += x
+            boxes[[1, 3]] += y
+            for j, box in enumerate(boxes.T.tolist()):
+                cls = classes[j]
+                color = colors(cls)
                 cls = names[cls] if names else cls
                 if labels or conf[j] > 0.25:  # 0.25 conf thresh
-                    label = '%s' % cls if labels else '%s %.1f' % (cls, conf[j])
-                    plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl)
-
-        # Draw image filename labels
-        if paths:
-            label = Path(paths[i]).name[:40]  # trim to 40 char
-            t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
-            cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf,
-                        lineType=cv2.LINE_AA)
-
-        # Image border
-        cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3)
-
-    if fname:
-        r = min(1280. / max(h, w) / ns, 1.0)  # ratio to limit image size
-        mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA)
-        # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB))  # cv2 save
-        Image.fromarray(mosaic).save(fname)  # PIL save
-    return mosaic
+                    label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}'
+                    annotator.box_label(box, label, color=color)
+    annotator.im.save(fname)  # save
 
 
 def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''):
@@ -209,9 +308,9 @@ def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''):
     plt.close()
 
 
-def plot_test_txt():  # from utils.plots import *; plot_test()
-    # Plot test.txt histograms
-    x = np.loadtxt('test.txt', dtype=np.float32)
+def plot_val_txt():  # from utils.plots import *; plot_val()
+    # Plot val.txt histograms
+    x = np.loadtxt('val.txt', dtype=np.float32)
     box = xyxy2xywh(x[:, :4])
     cx, cy = box[:, 0], box[:, 1]
 
@@ -233,76 +332,91 @@ def plot_targets_txt():  # from utils.plots import *; plot_targets_txt()
     fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)
     ax = ax.ravel()
     for i in range(4):
-        ax[i].hist(x[i], bins=100, label='%.3g +/- %.3g' % (x[i].mean(), x[i].std()))
+        ax[i].hist(x[i], bins=100, label=f'{x[i].mean():.3g} +/- {x[i].std():.3g}')
         ax[i].legend()
         ax[i].set_title(s[i])
     plt.savefig('targets.jpg', dpi=200)
 
 
-def plot_study_txt(path='', x=None):  # from utils.plots import *; plot_study_txt()
-    # Plot study.txt generated by test.py
-    fig, ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True)
-    # ax = ax.ravel()
+def plot_val_study(file='', dir='', x=None):  # from utils.plots import *; plot_val_study()
+    # Plot file=study.txt generated by val.py (or plot all study*.txt in dir)
+    save_dir = Path(file).parent if file else Path(dir)
+    plot2 = False  # plot additional results
+    if plot2:
+        ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True)[1].ravel()
 
     fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True)
-    # for f in [Path(path) / f'study_coco_{x}.txt' for x in ['yolov5s6', 'yolov5m6', 'yolov5l6', 'yolov5x6']]:
-    for f in sorted(Path(path).glob('study*.txt')):
+    # for f in [save_dir / f'study_coco_{x}.txt' for x in ['yolov5n6', 'yolov5s6', 'yolov5m6', 'yolov5l6', 'yolov5x6']]:
+    for f in sorted(save_dir.glob('study*.txt')):
         y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T
         x = np.arange(y.shape[1]) if x is None else np.array(x)
-        s = ['P', 'R', 'mAP@.5', 'mAP@.5:.95', 't_inference (ms/img)', 't_NMS (ms/img)', 't_total (ms/img)']
-        # for i in range(7):
-        #     ax[i].plot(x, y[i], '.-', linewidth=2, markersize=8)
-        #     ax[i].set_title(s[i])
+        if plot2:
+            s = ['P', 'R', 'mAP@.5', 'mAP@.5:.95', 't_preprocess (ms/img)', 't_inference (ms/img)', 't_NMS (ms/img)']
+            for i in range(7):
+                ax[i].plot(x, y[i], '.-', linewidth=2, markersize=8)
+                ax[i].set_title(s[i])
 
         j = y[3].argmax() + 1
-        ax2.plot(y[6, 1:j], y[3, 1:j] * 1E2, '.-', linewidth=2, markersize=8,
+        ax2.plot(y[5, 1:j],
+                 y[3, 1:j] * 1E2,
+                 '.-',
+                 linewidth=2,
+                 markersize=8,
                  label=f.stem.replace('study_coco_', '').replace('yolo', 'YOLO'))
 
     ax2.plot(1E3 / np.array([209, 140, 97, 58, 35, 18]), [34.6, 40.5, 43.0, 47.5, 49.7, 51.5],
-             'k.-', linewidth=2, markersize=8, alpha=.25, label='EfficientDet')
+             'k.-',
+             linewidth=2,
+             markersize=8,
+             alpha=.25,
+             label='EfficientDet')
 
     ax2.grid(alpha=0.2)
     ax2.set_yticks(np.arange(20, 60, 5))
     ax2.set_xlim(0, 57)
-    ax2.set_ylim(30, 55)
+    ax2.set_ylim(25, 55)
     ax2.set_xlabel('GPU Speed (ms/img)')
     ax2.set_ylabel('COCO AP val')
     ax2.legend(loc='lower right')
-    plt.savefig(str(Path(path).name) + '.png', dpi=300)
+    f = save_dir / 'study.png'
+    print(f'Saving {f}...')
+    plt.savefig(f, dpi=300)
 
 
-def plot_labels(labels, names=(), save_dir=Path(''), loggers=None):
+@TryExcept()  # known issue https://github.com/ultralytics/yolov5/issues/5395
+def plot_labels(labels, names=(), save_dir=Path('')):
     # plot dataset labels
-    print('Plotting labels... ')
+    LOGGER.info(f"Plotting labels to {save_dir / 'labels.jpg'}... ")
     c, b = labels[:, 0], labels[:, 1:].transpose()  # classes, boxes
     nc = int(c.max() + 1)  # number of classes
-    colors = color_list()
     x = pd.DataFrame(b.transpose(), columns=['x', 'y', 'width', 'height'])
 
     # seaborn correlogram
-    sns.pairplot(x, corner=True, diag_kind='auto', kind='hist', diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9))
+    sn.pairplot(x, corner=True, diag_kind='auto', kind='hist', diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9))
     plt.savefig(save_dir / 'labels_correlogram.jpg', dpi=200)
     plt.close()
 
     # matplotlib labels
     matplotlib.use('svg')  # faster
     ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel()
-    ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8)
+    y = ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8)
+    with contextlib.suppress(Exception):  # color histogram bars by class
+        [y[2].patches[i].set_color([x / 255 for x in colors(i)]) for i in range(nc)]  # known issue #3195
     ax[0].set_ylabel('instances')
     if 0 < len(names) < 30:
         ax[0].set_xticks(range(len(names)))
-        ax[0].set_xticklabels(names, rotation=90, fontsize=10)
+        ax[0].set_xticklabels(list(names.values()), rotation=90, fontsize=10)
     else:
         ax[0].set_xlabel('classes')
-    sns.histplot(x, x='x', y='y', ax=ax[2], bins=50, pmax=0.9)
-    sns.histplot(x, x='width', y='height', ax=ax[3], bins=50, pmax=0.9)
+    sn.histplot(x, x='x', y='y', ax=ax[2], bins=50, pmax=0.9)
+    sn.histplot(x, x='width', y='height', ax=ax[3], bins=50, pmax=0.9)
 
     # rectangles
     labels[:, 1:3] = 0.5  # center
     labels[:, 1:] = xywh2xyxy(labels[:, 1:]) * 2000
     img = Image.fromarray(np.ones((2000, 2000, 3), dtype=np.uint8) * 255)
     for cls, *box in labels[:1000]:
-        ImageDraw.Draw(img).rectangle(box, width=1, outline=colors[int(cls) % 10])  # plot
+        ImageDraw.Draw(img).rectangle(box, width=1, outline=colors(cls))  # plot
     ax[1].imshow(img)
     ax[1].axis('off')
 
@@ -314,34 +428,87 @@ def plot_labels(labels, names=(), save_dir=Path(''), loggers=None):
     matplotlib.use('Agg')
     plt.close()
 
-    # loggers
-    for k, v in loggers.items() or {}:
-        if k == 'wandb' and v:
-            v.log({"Labels": [v.Image(str(x), caption=x.name) for x in save_dir.glob('*labels*.jpg')]}, commit=False)
 
-
-def plot_evolution(yaml_file='data/hyp.finetune.yaml'):  # from utils.plots import *; plot_evolution()
-    # Plot hyperparameter evolution results in evolve.txt
-    with open(yaml_file) as f:
-        hyp = yaml.safe_load(f)
-    x = np.loadtxt('evolve.txt', ndmin=2)
+def imshow_cls(im, labels=None, pred=None, names=None, nmax=25, verbose=False, f=Path('images.jpg')):
+    # Show classification image grid with labels (optional) and predictions (optional)
+    from utils.augmentations import denormalize
+
+    names = names or [f'class{i}' for i in range(1000)]
+    blocks = torch.chunk(denormalize(im.clone()).cpu().float(), len(im),
+                         dim=0)  # select batch index 0, block by channels
+    n = min(len(blocks), nmax)  # number of plots
+    m = min(8, round(n ** 0.5))  # 8 x 8 default
+    fig, ax = plt.subplots(math.ceil(n / m), m)  # 8 rows x n/8 cols
+    ax = ax.ravel() if m > 1 else [ax]
+    # plt.subplots_adjust(wspace=0.05, hspace=0.05)
+    for i in range(n):
+        ax[i].imshow(blocks[i].squeeze().permute((1, 2, 0)).numpy().clip(0.0, 1.0))
+        ax[i].axis('off')
+        if labels is not None:
+            s = names[labels[i]] + (f'—{names[pred[i]]}' if pred is not None else '')
+            ax[i].set_title(s, fontsize=8, verticalalignment='top')
+    plt.savefig(f, dpi=300, bbox_inches='tight')
+    plt.close()
+    if verbose:
+        LOGGER.info(f'Saving {f}')
+        if labels is not None:
+            LOGGER.info('True:     ' + ' '.join(f'{names[i]:3s}' for i in labels[:nmax]))
+        if pred is not None:
+            LOGGER.info('Predicted:' + ' '.join(f'{names[i]:3s}' for i in pred[:nmax]))
+    return f
+
+
+def plot_evolve(evolve_csv='path/to/evolve.csv'):  # from utils.plots import *; plot_evolve()
+    # Plot evolve.csv hyp evolution results
+    evolve_csv = Path(evolve_csv)
+    data = pd.read_csv(evolve_csv)
+    keys = [x.strip() for x in data.columns]
+    x = data.values
     f = fitness(x)
-    # weights = (f - f.min()) ** 2  # for weighted results
+    j = np.argmax(f)  # max fitness index
     plt.figure(figsize=(10, 12), tight_layout=True)
     matplotlib.rc('font', **{'size': 8})
-    for i, (k, v) in enumerate(hyp.items()):
-        y = x[:, i + 7]
-        # mu = (y * weights).sum() / weights.sum()  # best weighted result
-        mu = y[f.argmax()]  # best single result
+    print(f'Best results from row {j} of {evolve_csv}:')
+    for i, k in enumerate(keys[7:]):
+        v = x[:, 7 + i]
+        mu = v[j]  # best single result
         plt.subplot(6, 5, i + 1)
-        plt.scatter(y, f, c=hist2d(y, f, 20), cmap='viridis', alpha=.8, edgecolors='none')
+        plt.scatter(v, f, c=hist2d(v, f, 20), cmap='viridis', alpha=.8, edgecolors='none')
         plt.plot(mu, f.max(), 'k+', markersize=15)
-        plt.title('%s = %.3g' % (k, mu), fontdict={'size': 9})  # limit to 40 characters
+        plt.title(f'{k} = {mu:.3g}', fontdict={'size': 9})  # limit to 40 characters
         if i % 5 != 0:
             plt.yticks([])
-        print('%15s: %.3g' % (k, mu))
-    plt.savefig('evolve.png', dpi=200)
-    print('\nPlot saved as evolve.png')
+        print(f'{k:>15}: {mu:.3g}')
+    f = evolve_csv.with_suffix('.png')  # filename
+    plt.savefig(f, dpi=200)
+    plt.close()
+    print(f'Saved {f}')
+
+
+def plot_results(file='path/to/results.csv', dir=''):
+    # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
+    save_dir = Path(file).parent if file else Path(dir)
+    fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True)
+    ax = ax.ravel()
+    files = list(save_dir.glob('results*.csv'))
+    assert len(files), f'No results.csv files found in {save_dir.resolve()}, nothing to plot.'
+    for f in files:
+        try:
+            data = pd.read_csv(f)
+            s = [x.strip() for x in data.columns]
+            x = data.values[:, 0]
+            for i, j in enumerate([1, 2, 3, 4, 5, 8, 9, 10, 6, 7]):
+                y = data.values[:, j].astype('float')
+                # y[y == 0] = np.nan  # don't show zero values
+                ax[i].plot(x, y, marker='.', label=f.stem, linewidth=2, markersize=8)
+                ax[i].set_title(s[j], fontsize=12)
+                # if j in [8, 9, 10]:  # share train and val loss y axes
+                #     ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
+        except Exception as e:
+            LOGGER.info(f'Warning: Plotting error for {f}: {e}')
+    ax[1].legend()
+    fig.savefig(save_dir / 'results.png', dpi=200)
+    plt.close()
 
 
 def profile_idetection(start=0, stop=0, labels=(), save_dir=''):
@@ -370,66 +537,24 @@ def profile_idetection(start=0, stop=0, labels=(), save_dir=''):
                 else:
                     a.remove()
         except Exception as e:
-            print('Warning: Plotting error for %s; %s' % (f, e))
-
+            print(f'Warning: Plotting error for {f}; {e}')
     ax[1].legend()
     plt.savefig(Path(save_dir) / 'idetection_profile.png', dpi=200)
 
 
-def plot_results_overlay(start=0, stop=0):  # from utils.plots import *; plot_results_overlay()
-    # Plot training 'results*.txt', overlaying train and val losses
-    s = ['train', 'train', 'train', 'Precision', 'mAP@0.5', 'val', 'val', 'val', 'Recall', 'mAP@0.5:0.95']  # legends
-    t = ['Box', 'Objectness', 'Classification', 'P-R', 'mAP-F1']  # titles
-    for f in sorted(glob.glob('results*.txt') + glob.glob('../../Downloads/results*.txt')):
-        results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T
-        n = results.shape[1]  # number of rows
-        x = range(start, min(stop, n) if stop else n)
-        fig, ax = plt.subplots(1, 5, figsize=(14, 3.5), tight_layout=True)
-        ax = ax.ravel()
-        for i in range(5):
-            for j in [i, i + 5]:
-                y = results[j, x]
-                ax[i].plot(x, y, marker='.', label=s[j])
-                # y_smooth = butter_lowpass_filtfilt(y)
-                # ax[i].plot(x, np.gradient(y_smooth), marker='.', label=s[j])
-
-            ax[i].set_title(t[i])
-            ax[i].legend()
-            ax[i].set_ylabel(f) if i == 0 else None  # add filename
-        fig.savefig(f.replace('.txt', '.png'), dpi=200)
-
-
-def plot_results(start=0, stop=0, bucket='', id=(), labels=(), save_dir=''):
-    # Plot training 'results*.txt'. from utils.plots import *; plot_results(save_dir='runs/train/exp')
-    fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True)
-    ax = ax.ravel()
-    s = ['Box', 'Objectness', 'Classification', 'Precision', 'Recall',
-         'val Box', 'val Objectness', 'val Classification', 'mAP@0.5', 'mAP@0.5:0.95']
-    if bucket:
-        # files = ['https://storage.googleapis.com/%s/results%g.txt' % (bucket, x) for x in id]
-        files = ['results%g.txt' % x for x in id]
-        c = ('gsutil cp ' + '%s ' * len(files) + '.') % tuple('gs://%s/results%g.txt' % (bucket, x) for x in id)
-        os.system(c)
-    else:
-        files = list(Path(save_dir).glob('results*.txt'))
-    assert len(files), 'No results.txt files found in %s, nothing to plot.' % os.path.abspath(save_dir)
-    for fi, f in enumerate(files):
-        try:
-            results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T
-            n = results.shape[1]  # number of rows
-            x = range(start, min(stop, n) if stop else n)
-            for i in range(10):
-                y = results[i, x]
-                if i in [0, 1, 2, 5, 6, 7]:
-                    y[y == 0] = np.nan  # don't show zero loss values
-                    # y /= y[0]  # normalize
-                label = labels[fi] if len(labels) else f.stem
-                ax[i].plot(x, y, marker='.', label=label, linewidth=2, markersize=8)
-                ax[i].set_title(s[i])
-                # if i in [5, 6, 7]:  # share train and val loss y axes
-                #     ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
-        except Exception as e:
-            print('Warning: Plotting error for %s; %s' % (f, e))
-
-    ax[1].legend()
-    fig.savefig(Path(save_dir) / 'results.png', dpi=200)
+def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False, BGR=False, save=True):
+    # Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop
+    xyxy = torch.tensor(xyxy).view(-1, 4)
+    b = xyxy2xywh(xyxy)  # boxes
+    if square:
+        b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1)  # attempt rectangle to square
+    b[:, 2:] = b[:, 2:] * gain + pad  # box wh * gain + pad
+    xyxy = xywh2xyxy(b).long()
+    clip_boxes(xyxy, im.shape)
+    crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)]
+    if save:
+        file.parent.mkdir(parents=True, exist_ok=True)  # make directory
+        f = str(increment_path(file).with_suffix('.jpg'))
+        # cv2.imwrite(f, crop)  # save BGR, https://github.com/ultralytics/yolov5/issues/7007 chroma subsampling issue
+        Image.fromarray(crop[..., ::-1]).save(f, quality=95, subsampling=0)  # save RGB
+    return crop
diff --git a/utils/segment/__init__.py b/utils/segment/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/utils/segment/augmentations.py b/utils/segment/augmentations.py
new file mode 100644
index 000000000000..169addedf0f5
--- /dev/null
+++ b/utils/segment/augmentations.py
@@ -0,0 +1,104 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Image augmentation functions
+"""
+
+import math
+import random
+
+import cv2
+import numpy as np
+
+from ..augmentations import box_candidates
+from ..general import resample_segments, segment2box
+
+
+def mixup(im, labels, segments, im2, labels2, segments2):
+    # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
+    r = np.random.beta(32.0, 32.0)  # mixup ratio, alpha=beta=32.0
+    im = (im * r + im2 * (1 - r)).astype(np.uint8)
+    labels = np.concatenate((labels, labels2), 0)
+    segments = np.concatenate((segments, segments2), 0)
+    return im, labels, segments
+
+
+def random_perspective(im,
+                       targets=(),
+                       segments=(),
+                       degrees=10,
+                       translate=.1,
+                       scale=.1,
+                       shear=10,
+                       perspective=0.0,
+                       border=(0, 0)):
+    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
+    # targets = [cls, xyxy]
+
+    height = im.shape[0] + border[0] * 2  # shape(h,w,c)
+    width = im.shape[1] + border[1] * 2
+
+    # Center
+    C = np.eye(3)
+    C[0, 2] = -im.shape[1] / 2  # x translation (pixels)
+    C[1, 2] = -im.shape[0] / 2  # y translation (pixels)
+
+    # Perspective
+    P = np.eye(3)
+    P[2, 0] = random.uniform(-perspective, perspective)  # x perspective (about y)
+    P[2, 1] = random.uniform(-perspective, perspective)  # y perspective (about x)
+
+    # Rotation and Scale
+    R = np.eye(3)
+    a = random.uniform(-degrees, degrees)
+    # a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotations
+    s = random.uniform(1 - scale, 1 + scale)
+    # s = 2 ** random.uniform(-scale, scale)
+    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
+
+    # Shear
+    S = np.eye(3)
+    S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
+    S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)
+
+    # Translation
+    T = np.eye(3)
+    T[0, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * width)  # x translation (pixels)
+    T[1, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * height)  # y translation (pixels)
+
+    # Combined rotation matrix
+    M = T @ S @ R @ P @ C  # order of operations (right to left) is IMPORTANT
+    if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changed
+        if perspective:
+            im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114))
+        else:  # affine
+            im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
+
+    # Visualize
+    # import matplotlib.pyplot as plt
+    # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
+    # ax[0].imshow(im[:, :, ::-1])  # base
+    # ax[1].imshow(im2[:, :, ::-1])  # warped
+
+    # Transform label coordinates
+    n = len(targets)
+    new_segments = []
+    if n:
+        new = np.zeros((n, 4))
+        segments = resample_segments(segments)  # upsample
+        for i, segment in enumerate(segments):
+            xy = np.ones((len(segment), 3))
+            xy[:, :2] = segment
+            xy = xy @ M.T  # transform
+            xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2])  # perspective rescale or affine
+
+            # clip
+            new[i] = segment2box(xy, width, height)
+            new_segments.append(xy)
+
+        # filter candidates
+        i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01)
+        targets = targets[i]
+        targets[:, 1:5] = new[i]
+        new_segments = np.array(new_segments)[i]
+
+    return im, targets, new_segments
diff --git a/utils/segment/dataloaders.py b/utils/segment/dataloaders.py
new file mode 100644
index 000000000000..097a5d5cb058
--- /dev/null
+++ b/utils/segment/dataloaders.py
@@ -0,0 +1,332 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Dataloaders
+"""
+
+import os
+import random
+
+import cv2
+import numpy as np
+import torch
+from torch.utils.data import DataLoader, distributed
+
+from ..augmentations import augment_hsv, copy_paste, letterbox
+from ..dataloaders import InfiniteDataLoader, LoadImagesAndLabels, seed_worker
+from ..general import LOGGER, xyn2xy, xywhn2xyxy, xyxy2xywhn
+from ..torch_utils import torch_distributed_zero_first
+from .augmentations import mixup, random_perspective
+
+RANK = int(os.getenv('RANK', -1))
+
+
+def create_dataloader(path,
+                      imgsz,
+                      batch_size,
+                      stride,
+                      single_cls=False,
+                      hyp=None,
+                      augment=False,
+                      cache=False,
+                      pad=0.0,
+                      rect=False,
+                      rank=-1,
+                      workers=8,
+                      image_weights=False,
+                      quad=False,
+                      prefix='',
+                      shuffle=False,
+                      mask_downsample_ratio=1,
+                      overlap_mask=False,
+                      seed=0):
+    if rect and shuffle:
+        LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False')
+        shuffle = False
+    with torch_distributed_zero_first(rank):  # init dataset *.cache only once if DDP
+        dataset = LoadImagesAndLabelsAndMasks(
+            path,
+            imgsz,
+            batch_size,
+            augment=augment,  # augmentation
+            hyp=hyp,  # hyperparameters
+            rect=rect,  # rectangular batches
+            cache_images=cache,
+            single_cls=single_cls,
+            stride=int(stride),
+            pad=pad,
+            image_weights=image_weights,
+            prefix=prefix,
+            downsample_ratio=mask_downsample_ratio,
+            overlap=overlap_mask)
+
+    batch_size = min(batch_size, len(dataset))
+    nd = torch.cuda.device_count()  # number of CUDA devices
+    nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers])  # number of workers
+    sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
+    loader = DataLoader if image_weights else InfiniteDataLoader  # only DataLoader allows for attribute updates
+    generator = torch.Generator()
+    generator.manual_seed(6148914691236517205 + seed + RANK)
+    return loader(
+        dataset,
+        batch_size=batch_size,
+        shuffle=shuffle and sampler is None,
+        num_workers=nw,
+        sampler=sampler,
+        pin_memory=True,
+        collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn,
+        worker_init_fn=seed_worker,
+        generator=generator,
+    ), dataset
+
+
+class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels):  # for training/testing
+
+    def __init__(
+        self,
+        path,
+        img_size=640,
+        batch_size=16,
+        augment=False,
+        hyp=None,
+        rect=False,
+        image_weights=False,
+        cache_images=False,
+        single_cls=False,
+        stride=32,
+        pad=0,
+        min_items=0,
+        prefix='',
+        downsample_ratio=1,
+        overlap=False,
+    ):
+        super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls,
+                         stride, pad, min_items, prefix)
+        self.downsample_ratio = downsample_ratio
+        self.overlap = overlap
+
+    def __getitem__(self, index):
+        index = self.indices[index]  # linear, shuffled, or image_weights
+
+        hyp = self.hyp
+        mosaic = self.mosaic and random.random() < hyp['mosaic']
+        masks = []
+        if mosaic:
+            # Load mosaic
+            img, labels, segments = self.load_mosaic(index)
+            shapes = None
+
+            # MixUp augmentation
+            if random.random() < hyp['mixup']:
+                img, labels, segments = mixup(img, labels, segments, *self.load_mosaic(random.randint(0, self.n - 1)))
+
+        else:
+            # Load image
+            img, (h0, w0), (h, w) = self.load_image(index)
+
+            # Letterbox
+            shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size  # final letterboxed shape
+            img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
+            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling
+
+            labels = self.labels[index].copy()
+            # [array, array, ....], array.shape=(num_points, 2), xyxyxyxy
+            segments = self.segments[index].copy()
+            if len(segments):
+                for i_s in range(len(segments)):
+                    segments[i_s] = xyn2xy(
+                        segments[i_s],
+                        ratio[0] * w,
+                        ratio[1] * h,
+                        padw=pad[0],
+                        padh=pad[1],
+                    )
+            if labels.size:  # normalized xywh to pixel xyxy format
+                labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
+
+            if self.augment:
+                img, labels, segments = random_perspective(img,
+                                                           labels,
+                                                           segments=segments,
+                                                           degrees=hyp['degrees'],
+                                                           translate=hyp['translate'],
+                                                           scale=hyp['scale'],
+                                                           shear=hyp['shear'],
+                                                           perspective=hyp['perspective'])
+
+        nl = len(labels)  # number of labels
+        if nl:
+            labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3)
+            if self.overlap:
+                masks, sorted_idx = polygons2masks_overlap(img.shape[:2],
+                                                           segments,
+                                                           downsample_ratio=self.downsample_ratio)
+                masks = masks[None]  # (640, 640) -> (1, 640, 640)
+                labels = labels[sorted_idx]
+            else:
+                masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio)
+
+        masks = (torch.from_numpy(masks) if len(masks) else torch.zeros(1 if self.overlap else nl, img.shape[0] //
+                                                                        self.downsample_ratio, img.shape[1] //
+                                                                        self.downsample_ratio))
+        # TODO: albumentations support
+        if self.augment:
+            # Albumentations
+            # there are some augmentation that won't change boxes and masks,
+            # so just be it for now.
+            img, labels = self.albumentations(img, labels)
+            nl = len(labels)  # update after albumentations
+
+            # HSV color-space
+            augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
+
+            # Flip up-down
+            if random.random() < hyp['flipud']:
+                img = np.flipud(img)
+                if nl:
+                    labels[:, 2] = 1 - labels[:, 2]
+                    masks = torch.flip(masks, dims=[1])
+
+            # Flip left-right
+            if random.random() < hyp['fliplr']:
+                img = np.fliplr(img)
+                if nl:
+                    labels[:, 1] = 1 - labels[:, 1]
+                    masks = torch.flip(masks, dims=[2])
+
+            # Cutouts  # labels = cutout(img, labels, p=0.5)
+
+        labels_out = torch.zeros((nl, 6))
+        if nl:
+            labels_out[:, 1:] = torch.from_numpy(labels)
+
+        # Convert
+        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
+        img = np.ascontiguousarray(img)
+
+        return (torch.from_numpy(img), labels_out, self.im_files[index], shapes, masks)
+
+    def load_mosaic(self, index):
+        # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
+        labels4, segments4 = [], []
+        s = self.img_size
+        yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border)  # mosaic center x, y
+
+        # 3 additional image indices
+        indices = [index] + random.choices(self.indices, k=3)  # 3 additional image indices
+        for i, index in enumerate(indices):
+            # Load image
+            img, _, (h, w) = self.load_image(index)
+
+            # place img in img4
+            if i == 0:  # top left
+                img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
+            elif i == 1:  # top right
+                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
+                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
+            elif i == 2:  # bottom left
+                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
+            elif i == 3:  # bottom right
+                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
+
+            img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+            padw = x1a - x1b
+            padh = y1a - y1b
+
+            labels, segments = self.labels[index].copy(), self.segments[index].copy()
+
+            if labels.size:
+                labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh)  # normalized xywh to pixel xyxy format
+                segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
+            labels4.append(labels)
+            segments4.extend(segments)
+
+        # Concat/clip labels
+        labels4 = np.concatenate(labels4, 0)
+        for x in (labels4[:, 1:], *segments4):
+            np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
+        # img4, labels4 = replicate(img4, labels4)  # replicate
+
+        # Augment
+        img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp['copy_paste'])
+        img4, labels4, segments4 = random_perspective(img4,
+                                                      labels4,
+                                                      segments4,
+                                                      degrees=self.hyp['degrees'],
+                                                      translate=self.hyp['translate'],
+                                                      scale=self.hyp['scale'],
+                                                      shear=self.hyp['shear'],
+                                                      perspective=self.hyp['perspective'],
+                                                      border=self.mosaic_border)  # border to remove
+        return img4, labels4, segments4
+
+    @staticmethod
+    def collate_fn(batch):
+        img, label, path, shapes, masks = zip(*batch)  # transposed
+        batched_masks = torch.cat(masks, 0)
+        for i, l in enumerate(label):
+            l[:, 0] = i  # add target image index for build_targets()
+        return torch.stack(img, 0), torch.cat(label, 0), path, shapes, batched_masks
+
+
+def polygon2mask(img_size, polygons, color=1, downsample_ratio=1):
+    """
+    Args:
+        img_size (tuple): The image size.
+        polygons (np.ndarray): [N, M], N is the number of polygons,
+            M is the number of points(Be divided by 2).
+    """
+    mask = np.zeros(img_size, dtype=np.uint8)
+    polygons = np.asarray(polygons)
+    polygons = polygons.astype(np.int32)
+    shape = polygons.shape
+    polygons = polygons.reshape(shape[0], -1, 2)
+    cv2.fillPoly(mask, polygons, color=color)
+    nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio)
+    # NOTE: fillPoly firstly then resize is trying the keep the same way
+    # of loss calculation when mask-ratio=1.
+    mask = cv2.resize(mask, (nw, nh))
+    return mask
+
+
+def polygons2masks(img_size, polygons, color, downsample_ratio=1):
+    """
+    Args:
+        img_size (tuple): The image size.
+        polygons (list[np.ndarray]): each polygon is [N, M],
+            N is the number of polygons,
+            M is the number of points(Be divided by 2).
+    """
+    masks = []
+    for si in range(len(polygons)):
+        mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color, downsample_ratio)
+        masks.append(mask)
+    return np.array(masks)
+
+
+def polygons2masks_overlap(img_size, segments, downsample_ratio=1):
+    """Return a (640, 640) overlap mask."""
+    masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio),
+                     dtype=np.int32 if len(segments) > 255 else np.uint8)
+    areas = []
+    ms = []
+    for si in range(len(segments)):
+        mask = polygon2mask(
+            img_size,
+            [segments[si].reshape(-1)],
+            downsample_ratio=downsample_ratio,
+            color=1,
+        )
+        ms.append(mask)
+        areas.append(mask.sum())
+    areas = np.asarray(areas)
+    index = np.argsort(-areas)
+    ms = np.array(ms)[index]
+    for i in range(len(segments)):
+        mask = ms[i] * (i + 1)
+        masks = masks + mask
+        masks = np.clip(masks, a_min=0, a_max=i + 1)
+    return masks, index
diff --git a/utils/segment/general.py b/utils/segment/general.py
new file mode 100644
index 000000000000..f1b2f1dd120f
--- /dev/null
+++ b/utils/segment/general.py
@@ -0,0 +1,160 @@
+import cv2
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+
+def crop_mask(masks, boxes):
+    """
+    "Crop" predicted masks by zeroing out everything not in the predicted bbox.
+    Vectorized by Chong (thanks Chong).
+
+    Args:
+        - masks should be a size [n, h, w] tensor of masks
+        - boxes should be a size [n, 4] tensor of bbox coords in relative point form
+    """
+
+    n, h, w = masks.shape
+    x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1)  # x1 shape(1,1,n)
+    r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :]  # rows shape(1,w,1)
+    c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None]  # cols shape(h,1,1)
+
+    return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
+
+
+def process_mask_upsample(protos, masks_in, bboxes, shape):
+    """
+    Crop after upsample.
+    protos: [mask_dim, mask_h, mask_w]
+    masks_in: [n, mask_dim], n is number of masks after nms
+    bboxes: [n, 4], n is number of masks after nms
+    shape: input_image_size, (h, w)
+
+    return: h, w, n
+    """
+
+    c, mh, mw = protos.shape  # CHW
+    masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
+    masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
+    masks = crop_mask(masks, bboxes)  # CHW
+    return masks.gt_(0.5)
+
+
+def process_mask(protos, masks_in, bboxes, shape, upsample=False):
+    """
+    Crop before upsample.
+    proto_out: [mask_dim, mask_h, mask_w]
+    out_masks: [n, mask_dim], n is number of masks after nms
+    bboxes: [n, 4], n is number of masks after nms
+    shape:input_image_size, (h, w)
+
+    return: h, w, n
+    """
+
+    c, mh, mw = protos.shape  # CHW
+    ih, iw = shape
+    masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)  # CHW
+
+    downsampled_bboxes = bboxes.clone()
+    downsampled_bboxes[:, 0] *= mw / iw
+    downsampled_bboxes[:, 2] *= mw / iw
+    downsampled_bboxes[:, 3] *= mh / ih
+    downsampled_bboxes[:, 1] *= mh / ih
+
+    masks = crop_mask(masks, downsampled_bboxes)  # CHW
+    if upsample:
+        masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
+    return masks.gt_(0.5)
+
+
+def process_mask_native(protos, masks_in, bboxes, shape):
+    """
+    Crop after upsample.
+    protos: [mask_dim, mask_h, mask_w]
+    masks_in: [n, mask_dim], n is number of masks after nms
+    bboxes: [n, 4], n is number of masks after nms
+    shape: input_image_size, (h, w)
+
+    return: h, w, n
+    """
+    c, mh, mw = protos.shape  # CHW
+    masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
+    gain = min(mh / shape[0], mw / shape[1])  # gain  = old / new
+    pad = (mw - shape[1] * gain) / 2, (mh - shape[0] * gain) / 2  # wh padding
+    top, left = int(pad[1]), int(pad[0])  # y, x
+    bottom, right = int(mh - pad[1]), int(mw - pad[0])
+    masks = masks[:, top:bottom, left:right]
+
+    masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
+    masks = crop_mask(masks, bboxes)  # CHW
+    return masks.gt_(0.5)
+
+
+def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
+    """
+    img1_shape: model input shape, [h, w]
+    img0_shape: origin pic shape, [h, w, 3]
+    masks: [h, w, num]
+    """
+    # Rescale coordinates (xyxy) from im1_shape to im0_shape
+    if ratio_pad is None:  # calculate from im0_shape
+        gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1])  # gain  = old / new
+        pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2  # wh padding
+    else:
+        pad = ratio_pad[1]
+    top, left = int(pad[1]), int(pad[0])  # y, x
+    bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0])
+
+    if len(masks.shape) < 2:
+        raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
+    masks = masks[top:bottom, left:right]
+    # masks = masks.permute(2, 0, 1).contiguous()
+    # masks = F.interpolate(masks[None], im0_shape[:2], mode='bilinear', align_corners=False)[0]
+    # masks = masks.permute(1, 2, 0).contiguous()
+    masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]))
+
+    if len(masks.shape) == 2:
+        masks = masks[:, :, None]
+    return masks
+
+
+def mask_iou(mask1, mask2, eps=1e-7):
+    """
+    mask1: [N, n] m1 means number of predicted objects
+    mask2: [M, n] m2 means number of gt objects
+    Note: n means image_w x image_h
+
+    return: masks iou, [N, M]
+    """
+    intersection = torch.matmul(mask1, mask2.t()).clamp(0)
+    union = (mask1.sum(1)[:, None] + mask2.sum(1)[None]) - intersection  # (area1 + area2) - intersection
+    return intersection / (union + eps)
+
+
+def masks_iou(mask1, mask2, eps=1e-7):
+    """
+    mask1: [N, n] m1 means number of predicted objects
+    mask2: [N, n] m2 means number of gt objects
+    Note: n means image_w x image_h
+
+    return: masks iou, (N, )
+    """
+    intersection = (mask1 * mask2).sum(1).clamp(0)  # (N, )
+    union = (mask1.sum(1) + mask2.sum(1))[None] - intersection  # (area1 + area2) - intersection
+    return intersection / (union + eps)
+
+
+def masks2segments(masks, strategy='largest'):
+    # Convert masks(n,160,160) into segments(n,xy)
+    segments = []
+    for x in masks.int().cpu().numpy().astype('uint8'):
+        c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
+        if c:
+            if strategy == 'concat':  # concatenate all segments
+                c = np.concatenate([x.reshape(-1, 2) for x in c])
+            elif strategy == 'largest':  # select largest segment
+                c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
+        else:
+            c = np.zeros((0, 2))  # no segments found
+        segments.append(c.astype('float32'))
+    return segments
diff --git a/utils/segment/loss.py b/utils/segment/loss.py
new file mode 100644
index 000000000000..caeff3cad586
--- /dev/null
+++ b/utils/segment/loss.py
@@ -0,0 +1,185 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..general import xywh2xyxy
+from ..loss import FocalLoss, smooth_BCE
+from ..metrics import bbox_iou
+from ..torch_utils import de_parallel
+from .general import crop_mask
+
+
+class ComputeLoss:
+    # Compute losses
+    def __init__(self, model, autobalance=False, overlap=False):
+        self.sort_obj_iou = False
+        self.overlap = overlap
+        device = next(model.parameters()).device  # get model device
+        h = model.hyp  # hyperparameters
+
+        # Define criteria
+        BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device))
+        BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device))
+
+        # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
+        self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0))  # positive, negative BCE targets
+
+        # Focal loss
+        g = h['fl_gamma']  # focal loss gamma
+        if g > 0:
+            BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
+
+        m = de_parallel(model).model[-1]  # Detect() module
+        self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02])  # P3-P7
+        self.ssi = list(m.stride).index(16) if autobalance else 0  # stride 16 index
+        self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance
+        self.na = m.na  # number of anchors
+        self.nc = m.nc  # number of classes
+        self.nl = m.nl  # number of layers
+        self.nm = m.nm  # number of masks
+        self.anchors = m.anchors
+        self.device = device
+
+    def __call__(self, preds, targets, masks):  # predictions, targets, model
+        p, proto = preds
+        bs, nm, mask_h, mask_w = proto.shape  # batch size, number of masks, mask height, mask width
+        lcls = torch.zeros(1, device=self.device)
+        lbox = torch.zeros(1, device=self.device)
+        lobj = torch.zeros(1, device=self.device)
+        lseg = torch.zeros(1, device=self.device)
+        tcls, tbox, indices, anchors, tidxs, xywhn = self.build_targets(p, targets)  # targets
+
+        # Losses
+        for i, pi in enumerate(p):  # layer index, layer predictions
+            b, a, gj, gi = indices[i]  # image, anchor, gridy, gridx
+            tobj = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device)  # target obj
+
+            n = b.shape[0]  # number of targets
+            if n:
+                pxy, pwh, _, pcls, pmask = pi[b, a, gj, gi].split((2, 2, 1, self.nc, nm), 1)  # subset of predictions
+
+                # Box regression
+                pxy = pxy.sigmoid() * 2 - 0.5
+                pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i]
+                pbox = torch.cat((pxy, pwh), 1)  # predicted box
+                iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze()  # iou(prediction, target)
+                lbox += (1.0 - iou).mean()  # iou loss
+
+                # Objectness
+                iou = iou.detach().clamp(0).type(tobj.dtype)
+                if self.sort_obj_iou:
+                    j = iou.argsort()
+                    b, a, gj, gi, iou = b[j], a[j], gj[j], gi[j], iou[j]
+                if self.gr < 1:
+                    iou = (1.0 - self.gr) + self.gr * iou
+                tobj[b, a, gj, gi] = iou  # iou ratio
+
+                # Classification
+                if self.nc > 1:  # cls loss (only if multiple classes)
+                    t = torch.full_like(pcls, self.cn, device=self.device)  # targets
+                    t[range(n), tcls[i]] = self.cp
+                    lcls += self.BCEcls(pcls, t)  # BCE
+
+                # Mask regression
+                if tuple(masks.shape[-2:]) != (mask_h, mask_w):  # downsample
+                    masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0]
+                marea = xywhn[i][:, 2:].prod(1)  # mask width, height normalized
+                mxyxy = xywh2xyxy(xywhn[i] * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device))
+                for bi in b.unique():
+                    j = b == bi  # matching index
+                    if self.overlap:
+                        mask_gti = torch.where(masks[bi][None] == tidxs[i][j].view(-1, 1, 1), 1.0, 0.0)
+                    else:
+                        mask_gti = masks[tidxs[i]][j]
+                    lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxy[j], marea[j])
+
+            obji = self.BCEobj(pi[..., 4], tobj)
+            lobj += obji * self.balance[i]  # obj loss
+            if self.autobalance:
+                self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
+
+        if self.autobalance:
+            self.balance = [x / self.balance[self.ssi] for x in self.balance]
+        lbox *= self.hyp['box']
+        lobj *= self.hyp['obj']
+        lcls *= self.hyp['cls']
+        lseg *= self.hyp['box'] / bs
+
+        loss = lbox + lobj + lcls + lseg
+        return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach()
+
+    def single_mask_loss(self, gt_mask, pred, proto, xyxy, area):
+        # Mask loss for one image
+        pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:])  # (n,32) @ (32,80,80) -> (n,80,80)
+        loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction='none')
+        return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean()
+
+    def build_targets(self, p, targets):
+        # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
+        na, nt = self.na, targets.shape[0]  # number of anchors, targets
+        tcls, tbox, indices, anch, tidxs, xywhn = [], [], [], [], [], []
+        gain = torch.ones(8, device=self.device)  # normalized to gridspace gain
+        ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt)  # same as .repeat_interleave(nt)
+        if self.overlap:
+            batch = p[0].shape[0]
+            ti = []
+            for i in range(batch):
+                num = (targets[:, 0] == i).sum()  # find number of targets of each image
+                ti.append(torch.arange(num, device=self.device).float().view(1, num).repeat(na, 1) + 1)  # (na, num)
+            ti = torch.cat(ti, 1)  # (na, nt)
+        else:
+            ti = torch.arange(nt, device=self.device).float().view(1, nt).repeat(na, 1)
+        targets = torch.cat((targets.repeat(na, 1, 1), ai[..., None], ti[..., None]), 2)  # append anchor indices
+
+        g = 0.5  # bias
+        off = torch.tensor(
+            [
+                [0, 0],
+                [1, 0],
+                [0, 1],
+                [-1, 0],
+                [0, -1],  # j,k,l,m
+                # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
+            ],
+            device=self.device).float() * g  # offsets
+
+        for i in range(self.nl):
+            anchors, shape = self.anchors[i], p[i].shape
+            gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]]  # xyxy gain
+
+            # Match targets to anchors
+            t = targets * gain  # shape(3,n,7)
+            if nt:
+                # Matches
+                r = t[..., 4:6] / anchors[:, None]  # wh ratio
+                j = torch.max(r, 1 / r).max(2)[0] < self.hyp['anchor_t']  # compare
+                # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t']  # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
+                t = t[j]  # filter
+
+                # Offsets
+                gxy = t[:, 2:4]  # grid xy
+                gxi = gain[[2, 3]] - gxy  # inverse
+                j, k = ((gxy % 1 < g) & (gxy > 1)).T
+                l, m = ((gxi % 1 < g) & (gxi > 1)).T
+                j = torch.stack((torch.ones_like(j), j, k, l, m))
+                t = t.repeat((5, 1, 1))[j]
+                offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
+            else:
+                t = targets[0]
+                offsets = 0
+
+            # Define
+            bc, gxy, gwh, at = t.chunk(4, 1)  # (image, class), grid xy, grid wh, anchors
+            (a, tidx), (b, c) = at.long().T, bc.long().T  # anchors, image, class
+            gij = (gxy - offsets).long()
+            gi, gj = gij.T  # grid indices
+
+            # Append
+            indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1)))  # image, anchor, grid
+            tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
+            anch.append(anchors[a])  # anchors
+            tcls.append(c)  # class
+            tidxs.append(tidx)
+            xywhn.append(torch.cat((gxy, gwh), 1) / gain[2:6])  # xywh normalized
+
+        return tcls, tbox, indices, anch, tidxs, xywhn
diff --git a/utils/segment/metrics.py b/utils/segment/metrics.py
new file mode 100644
index 000000000000..c9f137e38ead
--- /dev/null
+++ b/utils/segment/metrics.py
@@ -0,0 +1,210 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Model validation metrics
+"""
+
+import numpy as np
+
+from ..metrics import ap_per_class
+
+
+def fitness(x):
+    # Model fitness as a weighted combination of metrics
+    w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9]
+    return (x[:, :8] * w).sum(1)
+
+
+def ap_per_class_box_and_mask(
+        tp_m,
+        tp_b,
+        conf,
+        pred_cls,
+        target_cls,
+        plot=False,
+        save_dir='.',
+        names=(),
+):
+    """
+    Args:
+        tp_b: tp of boxes.
+        tp_m: tp of masks.
+        other arguments see `func: ap_per_class`.
+    """
+    results_boxes = ap_per_class(tp_b,
+                                 conf,
+                                 pred_cls,
+                                 target_cls,
+                                 plot=plot,
+                                 save_dir=save_dir,
+                                 names=names,
+                                 prefix='Box')[2:]
+    results_masks = ap_per_class(tp_m,
+                                 conf,
+                                 pred_cls,
+                                 target_cls,
+                                 plot=plot,
+                                 save_dir=save_dir,
+                                 names=names,
+                                 prefix='Mask')[2:]
+
+    results = {
+        'boxes': {
+            'p': results_boxes[0],
+            'r': results_boxes[1],
+            'ap': results_boxes[3],
+            'f1': results_boxes[2],
+            'ap_class': results_boxes[4]},
+        'masks': {
+            'p': results_masks[0],
+            'r': results_masks[1],
+            'ap': results_masks[3],
+            'f1': results_masks[2],
+            'ap_class': results_masks[4]}}
+    return results
+
+
+class Metric:
+
+    def __init__(self) -> None:
+        self.p = []  # (nc, )
+        self.r = []  # (nc, )
+        self.f1 = []  # (nc, )
+        self.all_ap = []  # (nc, 10)
+        self.ap_class_index = []  # (nc, )
+
+    @property
+    def ap50(self):
+        """AP@0.5 of all classes.
+        Return:
+            (nc, ) or [].
+        """
+        return self.all_ap[:, 0] if len(self.all_ap) else []
+
+    @property
+    def ap(self):
+        """AP@0.5:0.95
+        Return:
+            (nc, ) or [].
+        """
+        return self.all_ap.mean(1) if len(self.all_ap) else []
+
+    @property
+    def mp(self):
+        """mean precision of all classes.
+        Return:
+            float.
+        """
+        return self.p.mean() if len(self.p) else 0.0
+
+    @property
+    def mr(self):
+        """mean recall of all classes.
+        Return:
+            float.
+        """
+        return self.r.mean() if len(self.r) else 0.0
+
+    @property
+    def map50(self):
+        """Mean AP@0.5 of all classes.
+        Return:
+            float.
+        """
+        return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0
+
+    @property
+    def map(self):
+        """Mean AP@0.5:0.95 of all classes.
+        Return:
+            float.
+        """
+        return self.all_ap.mean() if len(self.all_ap) else 0.0
+
+    def mean_results(self):
+        """Mean of results, return mp, mr, map50, map"""
+        return (self.mp, self.mr, self.map50, self.map)
+
+    def class_result(self, i):
+        """class-aware result, return p[i], r[i], ap50[i], ap[i]"""
+        return (self.p[i], self.r[i], self.ap50[i], self.ap[i])
+
+    def get_maps(self, nc):
+        maps = np.zeros(nc) + self.map
+        for i, c in enumerate(self.ap_class_index):
+            maps[c] = self.ap[i]
+        return maps
+
+    def update(self, results):
+        """
+        Args:
+            results: tuple(p, r, ap, f1, ap_class)
+        """
+        p, r, all_ap, f1, ap_class_index = results
+        self.p = p
+        self.r = r
+        self.all_ap = all_ap
+        self.f1 = f1
+        self.ap_class_index = ap_class_index
+
+
+class Metrics:
+    """Metric for boxes and masks."""
+
+    def __init__(self) -> None:
+        self.metric_box = Metric()
+        self.metric_mask = Metric()
+
+    def update(self, results):
+        """
+        Args:
+            results: Dict{'boxes': Dict{}, 'masks': Dict{}}
+        """
+        self.metric_box.update(list(results['boxes'].values()))
+        self.metric_mask.update(list(results['masks'].values()))
+
+    def mean_results(self):
+        return self.metric_box.mean_results() + self.metric_mask.mean_results()
+
+    def class_result(self, i):
+        return self.metric_box.class_result(i) + self.metric_mask.class_result(i)
+
+    def get_maps(self, nc):
+        return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc)
+
+    @property
+    def ap_class_index(self):
+        # boxes and masks have the same ap_class_index
+        return self.metric_box.ap_class_index
+
+
+KEYS = [
+    'train/box_loss',
+    'train/seg_loss',  # train loss
+    'train/obj_loss',
+    'train/cls_loss',
+    'metrics/precision(B)',
+    'metrics/recall(B)',
+    'metrics/mAP_0.5(B)',
+    'metrics/mAP_0.5:0.95(B)',  # metrics
+    'metrics/precision(M)',
+    'metrics/recall(M)',
+    'metrics/mAP_0.5(M)',
+    'metrics/mAP_0.5:0.95(M)',  # metrics
+    'val/box_loss',
+    'val/seg_loss',  # val loss
+    'val/obj_loss',
+    'val/cls_loss',
+    'x/lr0',
+    'x/lr1',
+    'x/lr2',]
+
+BEST_KEYS = [
+    'best/epoch',
+    'best/precision(B)',
+    'best/recall(B)',
+    'best/mAP_0.5(B)',
+    'best/mAP_0.5:0.95(B)',
+    'best/precision(M)',
+    'best/recall(M)',
+    'best/mAP_0.5(M)',
+    'best/mAP_0.5:0.95(M)',]
diff --git a/utils/segment/plots.py b/utils/segment/plots.py
new file mode 100644
index 000000000000..1b22ec838ac9
--- /dev/null
+++ b/utils/segment/plots.py
@@ -0,0 +1,143 @@
+import contextlib
+import math
+from pathlib import Path
+
+import cv2
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import torch
+
+from .. import threaded
+from ..general import xywh2xyxy
+from ..plots import Annotator, colors
+
+
+@threaded
+def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg', names=None):
+    # Plot image grid with labels
+    if isinstance(images, torch.Tensor):
+        images = images.cpu().float().numpy()
+    if isinstance(targets, torch.Tensor):
+        targets = targets.cpu().numpy()
+    if isinstance(masks, torch.Tensor):
+        masks = masks.cpu().numpy().astype(int)
+
+    max_size = 1920  # max image size
+    max_subplots = 16  # max image subplots, i.e. 4x4
+    bs, _, h, w = images.shape  # batch size, _, height, width
+    bs = min(bs, max_subplots)  # limit plot images
+    ns = np.ceil(bs ** 0.5)  # number of subplots (square)
+    if np.max(images[0]) <= 1:
+        images *= 255  # de-normalise (optional)
+
+    # Build Image
+    mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
+    for i, im in enumerate(images):
+        if i == max_subplots:  # if last batch has fewer images than we expect
+            break
+        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
+        im = im.transpose(1, 2, 0)
+        mosaic[y:y + h, x:x + w, :] = im
+
+    # Resize (optional)
+    scale = max_size / ns / max(h, w)
+    if scale < 1:
+        h = math.ceil(scale * h)
+        w = math.ceil(scale * w)
+        mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))
+
+    # Annotate
+    fs = int((h + w) * ns * 0.01)  # font size
+    annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names)
+    for i in range(i + 1):
+        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
+        annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2)  # borders
+        if paths:
+            annotator.text((x + 5, y + 5), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220))  # filenames
+        if len(targets) > 0:
+            idx = targets[:, 0] == i
+            ti = targets[idx]  # image targets
+
+            boxes = xywh2xyxy(ti[:, 2:6]).T
+            classes = ti[:, 1].astype('int')
+            labels = ti.shape[1] == 6  # labels if no conf column
+            conf = None if labels else ti[:, 6]  # check for confidence presence (label vs pred)
+
+            if boxes.shape[1]:
+                if boxes.max() <= 1.01:  # if normalized with tolerance 0.01
+                    boxes[[0, 2]] *= w  # scale to pixels
+                    boxes[[1, 3]] *= h
+                elif scale < 1:  # absolute coords need scale if image scales
+                    boxes *= scale
+            boxes[[0, 2]] += x
+            boxes[[1, 3]] += y
+            for j, box in enumerate(boxes.T.tolist()):
+                cls = classes[j]
+                color = colors(cls)
+                cls = names[cls] if names else cls
+                if labels or conf[j] > 0.25:  # 0.25 conf thresh
+                    label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}'
+                    annotator.box_label(box, label, color=color)
+
+            # Plot masks
+            if len(masks):
+                if masks.max() > 1.0:  # mean that masks are overlap
+                    image_masks = masks[[i]]  # (1, 640, 640)
+                    nl = len(ti)
+                    index = np.arange(nl).reshape(nl, 1, 1) + 1
+                    image_masks = np.repeat(image_masks, nl, axis=0)
+                    image_masks = np.where(image_masks == index, 1.0, 0.0)
+                else:
+                    image_masks = masks[idx]
+
+                im = np.asarray(annotator.im).copy()
+                for j, box in enumerate(boxes.T.tolist()):
+                    if labels or conf[j] > 0.25:  # 0.25 conf thresh
+                        color = colors(classes[j])
+                        mh, mw = image_masks[j].shape
+                        if mh != h or mw != w:
+                            mask = image_masks[j].astype(np.uint8)
+                            mask = cv2.resize(mask, (w, h))
+                            mask = mask.astype(bool)
+                        else:
+                            mask = image_masks[j].astype(bool)
+                        with contextlib.suppress(Exception):
+                            im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6
+                annotator.fromarray(im)
+    annotator.im.save(fname)  # save
+
+
+def plot_results_with_masks(file='path/to/results.csv', dir='', best=True):
+    # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
+    save_dir = Path(file).parent if file else Path(dir)
+    fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True)
+    ax = ax.ravel()
+    files = list(save_dir.glob('results*.csv'))
+    assert len(files), f'No results.csv files found in {save_dir.resolve()}, nothing to plot.'
+    for f in files:
+        try:
+            data = pd.read_csv(f)
+            index = np.argmax(0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] +
+                              0.1 * data.values[:, 11])
+            s = [x.strip() for x in data.columns]
+            x = data.values[:, 0]
+            for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]):
+                y = data.values[:, j]
+                # y[y == 0] = np.nan  # don't show zero values
+                ax[i].plot(x, y, marker='.', label=f.stem, linewidth=2, markersize=2)
+                if best:
+                    # best
+                    ax[i].scatter(index, y[index], color='r', label=f'best:{index}', marker='*', linewidth=3)
+                    ax[i].set_title(s[j] + f'\n{round(y[index], 5)}')
+                else:
+                    # last
+                    ax[i].scatter(x[-1], y[-1], color='r', label='last', marker='*', linewidth=3)
+                    ax[i].set_title(s[j] + f'\n{round(y[-1], 5)}')
+                # if j in [8, 9, 10]:  # share train and val loss y axes
+                #     ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
+        except Exception as e:
+            print(f'Warning: Plotting error for {f}: {e}')
+    ax[1].legend()
+    fig.savefig(save_dir / 'results.png', dpi=200)
+    plt.close()
diff --git a/utils/torch_utils.py b/utils/torch_utils.py
index 9991e5ec87d8..5b67b3fa7a06 100644
--- a/utils/torch_utils.py
+++ b/utils/torch_utils.py
@@ -1,144 +1,212 @@
-# YOLOv5 PyTorch utils
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+PyTorch utils
+"""
 
-import datetime
-import logging
 import math
 import os
 import platform
 import subprocess
 import time
+import warnings
 from contextlib import contextmanager
 from copy import deepcopy
 from pathlib import Path
 
 import torch
-import torch.backends.cudnn as cudnn
+import torch.distributed as dist
 import torch.nn as nn
 import torch.nn.functional as F
-import torchvision
+from torch.nn.parallel import DistributedDataParallel as DDP
+
+from utils.general import LOGGER, check_version, colorstr, file_date, git_describe
+
+LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))  # https://pytorch.org/docs/stable/elastic/run.html
+RANK = int(os.getenv('RANK', -1))
+WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
 
 try:
-    import thop  # for FLOPS computation
+    import thop  # for FLOPs computation
 except ImportError:
     thop = None
-logger = logging.getLogger(__name__)
 
+# Suppress PyTorch warnings
+warnings.filterwarnings('ignore', message='User provided device_type of \'cuda\', but CUDA is not available. Disabling')
+warnings.filterwarnings('ignore', category=UserWarning)
 
-@contextmanager
-def torch_distributed_zero_first(local_rank: int):
-    """
-    Decorator to make all processes in distributed training wait for each local_master to do something.
-    """
-    if local_rank not in [-1, 0]:
-        torch.distributed.barrier()
-    yield
-    if local_rank == 0:
-        torch.distributed.barrier()
 
+def smart_inference_mode(torch_1_9=check_version(torch.__version__, '1.9.0')):
+    # Applies torch.inference_mode() decorator if torch>=1.9.0 else torch.no_grad() decorator
+    def decorate(fn):
+        return (torch.inference_mode if torch_1_9 else torch.no_grad)()(fn)
 
-def init_torch_seeds(seed=0):
-    # Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html
-    torch.manual_seed(seed)
-    if seed == 0:  # slower, more reproducible
-        cudnn.benchmark, cudnn.deterministic = False, True
-    else:  # faster, less reproducible
-        cudnn.benchmark, cudnn.deterministic = True, False
+    return decorate
 
 
-def date_modified(path=__file__):
-    # return human-readable file modification date, i.e. '2021-3-26'
-    t = datetime.datetime.fromtimestamp(Path(path).stat().st_mtime)
-    return f'{t.year}-{t.month}-{t.day}'
+def smartCrossEntropyLoss(label_smoothing=0.0):
+    # Returns nn.CrossEntropyLoss with label smoothing enabled for torch>=1.10.0
+    if check_version(torch.__version__, '1.10.0'):
+        return nn.CrossEntropyLoss(label_smoothing=label_smoothing)
+    if label_smoothing > 0:
+        LOGGER.warning(f'WARNING ⚠️ label smoothing {label_smoothing} requires torch>=1.10.0')
+    return nn.CrossEntropyLoss()
 
 
-def git_describe(path=Path(__file__).parent):  # path must be a directory
-    # return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe
-    s = f'git -C {path} describe --tags --long --always'
-    try:
-        return subprocess.check_output(s, shell=True, stderr=subprocess.STDOUT).decode()[:-1]
-    except subprocess.CalledProcessError as e:
-        return ''  # not a git repository
+def smart_DDP(model):
+    # Model DDP creation with checks
+    assert not check_version(torch.__version__, '1.12.0', pinned=True), \
+        'torch==1.12.0 torchvision==0.13.0 DDP training is not supported due to a known issue. ' \
+        'Please upgrade or downgrade torch to use DDP. See https://github.com/ultralytics/yolov5/issues/8395'
+    if check_version(torch.__version__, '1.11.0'):
+        return DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK, static_graph=True)
+    else:
+        return DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK)
+
+
+def reshape_classifier_output(model, n=1000):
+    # Update a TorchVision classification model to class count 'n' if required
+    from models.common import Classify
+    name, m = list((model.model if hasattr(model, 'model') else model).named_children())[-1]  # last module
+    if isinstance(m, Classify):  # YOLOv5 Classify() head
+        if m.linear.out_features != n:
+            m.linear = nn.Linear(m.linear.in_features, n)
+    elif isinstance(m, nn.Linear):  # ResNet, EfficientNet
+        if m.out_features != n:
+            setattr(model, name, nn.Linear(m.in_features, n))
+    elif isinstance(m, nn.Sequential):
+        types = [type(x) for x in m]
+        if nn.Linear in types:
+            i = types.index(nn.Linear)  # nn.Linear index
+            if m[i].out_features != n:
+                m[i] = nn.Linear(m[i].in_features, n)
+        elif nn.Conv2d in types:
+            i = types.index(nn.Conv2d)  # nn.Conv2d index
+            if m[i].out_channels != n:
+                m[i] = nn.Conv2d(m[i].in_channels, n, m[i].kernel_size, m[i].stride, bias=m[i].bias is not None)
+
+
+@contextmanager
+def torch_distributed_zero_first(local_rank: int):
+    # Decorator to make all processes in distributed training wait for each local_master to do something
+    if local_rank not in [-1, 0]:
+        dist.barrier(device_ids=[local_rank])
+    yield
+    if local_rank == 0:
+        dist.barrier(device_ids=[0])
 
 
-def select_device(device='', batch_size=None):
-    # device = 'cpu' or '0' or '0,1,2,3'
-    s = f'YOLOv5 🚀 {git_describe() or date_modified()} torch {torch.__version__} '  # string
-    cpu = device.lower() == 'cpu'
-    if cpu:
+def device_count():
+    # Returns number of CUDA devices available. Safe version of torch.cuda.device_count(). Supports Linux and Windows
+    assert platform.system() in ('Linux', 'Windows'), 'device_count() only supported on Linux or Windows'
+    try:
+        cmd = 'nvidia-smi -L | wc -l' if platform.system() == 'Linux' else 'nvidia-smi -L | find /c /v ""'  # Windows
+        return int(subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().split()[-1])
+    except Exception:
+        return 0
+
+
+def select_device(device='', batch_size=0, newline=True):
+    # device = None or 'cpu' or 0 or '0' or '0,1,2,3'
+    s = f'YOLOv5 🚀 {git_describe() or file_date()} Python-{platform.python_version()} torch-{torch.__version__} '
+    device = str(device).strip().lower().replace('cuda:', '').replace('none', '')  # to string, 'cuda:0' to '0'
+    cpu = device == 'cpu'
+    mps = device == 'mps'  # Apple Metal Performance Shaders (MPS)
+    if cpu or mps:
         os.environ['CUDA_VISIBLE_DEVICES'] = '-1'  # force torch.cuda.is_available() = False
     elif device:  # non-cpu device requested
-        os.environ['CUDA_VISIBLE_DEVICES'] = device  # set environment variable
-        assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested'  # check availability
-
-    cuda = not cpu and torch.cuda.is_available()
-    if cuda:
-        n = torch.cuda.device_count()
-        if n > 1 and batch_size:  # check that batch_size is compatible with device_count
+        os.environ['CUDA_VISIBLE_DEVICES'] = device  # set environment variable - must be before assert is_available()
+        assert torch.cuda.is_available() and torch.cuda.device_count() >= len(device.replace(',', '')), \
+            f"Invalid CUDA '--device {device}' requested, use '--device cpu' or pass valid CUDA device(s)"
+
+    if not cpu and not mps and torch.cuda.is_available():  # prefer GPU if available
+        devices = device.split(',') if device else '0'  # range(torch.cuda.device_count())  # i.e. 0,1,6,7
+        n = len(devices)  # device count
+        if n > 1 and batch_size > 0:  # check batch_size is divisible by device_count
             assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}'
-        space = ' ' * len(s)
-        for i, d in enumerate(device.split(',') if device else range(n)):
+        space = ' ' * (len(s) + 1)
+        for i, d in enumerate(devices):
             p = torch.cuda.get_device_properties(i)
-            s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / 1024 ** 2}MB)\n"  # bytes to MB
-    else:
+            s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n"  # bytes to MB
+        arg = 'cuda:0'
+    elif mps and getattr(torch, 'has_mps', False) and torch.backends.mps.is_available():  # prefer MPS if available
+        s += 'MPS\n'
+        arg = 'mps'
+    else:  # revert to CPU
         s += 'CPU\n'
+        arg = 'cpu'
 
-    logger.info(s.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else s)  # emoji-safe
-    return torch.device('cuda:0' if cuda else 'cpu')
+    if not newline:
+        s = s.rstrip()
+    LOGGER.info(s)
+    return torch.device(arg)
 
 
-def time_synchronized():
-    # pytorch-accurate time
+def time_sync():
+    # PyTorch-accurate time
     if torch.cuda.is_available():
         torch.cuda.synchronize()
     return time.time()
 
 
-def profile(x, ops, n=100, device=None):
-    # profile a pytorch module or list of modules. Example usage:
-    #     x = torch.randn(16, 3, 640, 640)  # input
-    #     m1 = lambda x: x * torch.sigmoid(x)
-    #     m2 = nn.SiLU()
-    #     profile(x, [m1, m2], n=100)  # profile speed over 100 iterations
-
-    device = device or torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
-    x = x.to(device)
-    x.requires_grad = True
-    print(torch.__version__, device.type, torch.cuda.get_device_properties(0) if device.type == 'cuda' else '')
-    print(f"\n{'Params':>12s}{'GFLOPS':>12s}{'forward (ms)':>16s}{'backward (ms)':>16s}{'input':>24s}{'output':>24s}")
-    for m in ops if isinstance(ops, list) else [ops]:
-        m = m.to(device) if hasattr(m, 'to') else m  # device
-        m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m  # type
-        dtf, dtb, t = 0., 0., [0., 0., 0.]  # dt forward, backward
-        try:
-            flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2  # GFLOPS
-        except:
-            flops = 0
-
-        for _ in range(n):
-            t[0] = time_synchronized()
-            y = m(x)
-            t[1] = time_synchronized()
+def profile(input, ops, n=10, device=None):
+    """ YOLOv5 speed/memory/FLOPs profiler
+    Usage:
+        input = torch.randn(16, 3, 640, 640)
+        m1 = lambda x: x * torch.sigmoid(x)
+        m2 = nn.SiLU()
+        profile(input, [m1, m2], n=100)  # profile over 100 iterations
+    """
+    results = []
+    if not isinstance(device, torch.device):
+        device = select_device(device)
+    print(f"{'Params':>12s}{'GFLOPs':>12s}{'GPU_mem (GB)':>14s}{'forward (ms)':>14s}{'backward (ms)':>14s}"
+          f"{'input':>24s}{'output':>24s}")
+
+    for x in input if isinstance(input, list) else [input]:
+        x = x.to(device)
+        x.requires_grad = True
+        for m in ops if isinstance(ops, list) else [ops]:
+            m = m.to(device) if hasattr(m, 'to') else m  # device
+            m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m
+            tf, tb, t = 0, 0, [0, 0, 0]  # dt forward, backward
             try:
-                _ = y.sum().backward()
-                t[2] = time_synchronized()
-            except:  # no backward method
-                t[2] = float('nan')
-            dtf += (t[1] - t[0]) * 1000 / n  # ms per op forward
-            dtb += (t[2] - t[1]) * 1000 / n  # ms per op backward
+                flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2  # GFLOPs
+            except Exception:
+                flops = 0
 
-        s_in = tuple(x.shape) if isinstance(x, torch.Tensor) else 'list'
-        s_out = tuple(y.shape) if isinstance(y, torch.Tensor) else 'list'
-        p = sum(list(x.numel() for x in m.parameters())) if isinstance(m, nn.Module) else 0  # parameters
-        print(f'{p:12}{flops:12.4g}{dtf:16.4g}{dtb:16.4g}{str(s_in):>24s}{str(s_out):>24s}')
+            try:
+                for _ in range(n):
+                    t[0] = time_sync()
+                    y = m(x)
+                    t[1] = time_sync()
+                    try:
+                        _ = (sum(yi.sum() for yi in y) if isinstance(y, list) else y).sum().backward()
+                        t[2] = time_sync()
+                    except Exception:  # no backward method
+                        # print(e)  # for debug
+                        t[2] = float('nan')
+                    tf += (t[1] - t[0]) * 1000 / n  # ms per op forward
+                    tb += (t[2] - t[1]) * 1000 / n  # ms per op backward
+                mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0  # (GB)
+                s_in, s_out = (tuple(x.shape) if isinstance(x, torch.Tensor) else 'list' for x in (x, y))  # shapes
+                p = sum(x.numel() for x in m.parameters()) if isinstance(m, nn.Module) else 0  # parameters
+                print(f'{p:12}{flops:12.4g}{mem:>14.3f}{tf:14.4g}{tb:14.4g}{str(s_in):>24s}{str(s_out):>24s}')
+                results.append([p, flops, mem, tf, tb, s_in, s_out])
+            except Exception as e:
+                print(e)
+                results.append(None)
+            torch.cuda.empty_cache()
+    return results
 
 
 def is_parallel(model):
+    # Returns True if model is of type DP or DDP
     return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
 
 
-def intersect_dicts(da, db, exclude=()):
-    # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values
-    return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape}
+def de_parallel(model):
+    # De-parallelize a model: returns single-GPU model if model is of type DP or DDP
+    return model.module if is_parallel(model) else model
 
 
 def initialize_weights(model):
@@ -149,7 +217,7 @@ def initialize_weights(model):
         elif t is nn.BatchNorm2d:
             m.eps = 1e-3
             m.momentum = 0.03
-        elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
+        elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
             m.inplace = True
 
 
@@ -160,7 +228,7 @@ def find_modules(model, mclass=nn.Conv2d):
 
 def sparsity(model):
     # Return global model sparsity
-    a, b = 0., 0.
+    a, b = 0, 0
     for p in model.parameters():
         a += p.numel()
         b += (p == 0).sum()
@@ -170,30 +238,30 @@ def sparsity(model):
 def prune(model, amount=0.3):
     # Prune model to requested global sparsity
     import torch.nn.utils.prune as prune
-    print('Pruning model... ', end='')
     for name, m in model.named_modules():
         if isinstance(m, nn.Conv2d):
             prune.l1_unstructured(m, name='weight', amount=amount)  # prune
             prune.remove(m, 'weight')  # make permanent
-    print(' %.3g global sparsity' % sparsity(model))
+    LOGGER.info(f'Model pruned to {sparsity(model):.3g} global sparsity')
 
 
 def fuse_conv_and_bn(conv, bn):
-    # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
+    # Fuse Conv2d() and BatchNorm2d() layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
     fusedconv = nn.Conv2d(conv.in_channels,
                           conv.out_channels,
                           kernel_size=conv.kernel_size,
                           stride=conv.stride,
                           padding=conv.padding,
+                          dilation=conv.dilation,
                           groups=conv.groups,
                           bias=True).requires_grad_(False).to(conv.weight.device)
 
-    # prepare filters
+    # Prepare filters
     w_conv = conv.weight.clone().view(conv.out_channels, -1)
     w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
     fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))
 
-    # prepare spatial bias
+    # Prepare spatial bias
     b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
     b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
     fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
@@ -201,60 +269,41 @@ def fuse_conv_and_bn(conv, bn):
     return fusedconv
 
 
-def model_info(model, verbose=False, img_size=640):
+def model_info(model, verbose=False, imgsz=640):
     # Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320]
     n_p = sum(x.numel() for x in model.parameters())  # number parameters
     n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients
     if verbose:
-        print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
+        print(f"{'layer':>5} {'name':>40} {'gradient':>9} {'parameters':>12} {'shape':>20} {'mu':>10} {'sigma':>10}")
         for i, (name, p) in enumerate(model.named_parameters()):
             name = name.replace('module_list.', '')
             print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
                   (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
 
-    try:  # FLOPS
-        from thop import profile
-        stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32
-        img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device)  # input
-        flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2  # stride GFLOPS
-        img_size = img_size if isinstance(img_size, list) else [img_size, img_size]  # expand if int/float
-        fs = ', %.1f GFLOPS' % (flops * img_size[0] / stride * img_size[1] / stride)  # 640x640 GFLOPS
-    except (ImportError, Exception):
+    try:  # FLOPs
+        p = next(model.parameters())
+        stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32  # max stride
+        im = torch.empty((1, p.shape[1], stride, stride), device=p.device)  # input image in BCHW format
+        flops = thop.profile(deepcopy(model), inputs=(im,), verbose=False)[0] / 1E9 * 2  # stride GFLOPs
+        imgsz = imgsz if isinstance(imgsz, list) else [imgsz, imgsz]  # expand if int/float
+        fs = f', {flops * imgsz[0] / stride * imgsz[1] / stride:.1f} GFLOPs'  # 640x640 GFLOPs
+    except Exception:
         fs = ''
 
-    logger.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")
-
-
-def load_classifier(name='resnet101', n=2):
-    # Loads a pretrained model reshaped to n-class output
-    model = torchvision.models.__dict__[name](pretrained=True)
-
-    # ResNet model properties
-    # input_size = [3, 224, 224]
-    # input_space = 'RGB'
-    # input_range = [0, 1]
-    # mean = [0.485, 0.456, 0.406]
-    # std = [0.229, 0.224, 0.225]
-
-    # Reshape output to n classes
-    filters = model.fc.weight.shape[1]
-    model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True)
-    model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True)
-    model.fc.out_features = n
-    return model
+    name = Path(model.yaml_file).stem.replace('yolov5', 'YOLOv5') if hasattr(model, 'yaml_file') else 'Model'
+    LOGGER.info(f'{name} summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}')
 
 
 def scale_img(img, ratio=1.0, same_shape=False, gs=32):  # img(16,3,256,416)
-    # scales img(bs,3,y,x) by ratio constrained to gs-multiple
+    # Scales img(bs,3,y,x) by ratio constrained to gs-multiple
     if ratio == 1.0:
         return img
-    else:
-        h, w = img.shape[2:]
-        s = (int(h * ratio), int(w * ratio))  # new size
-        img = F.interpolate(img, size=s, mode='bilinear', align_corners=False)  # resize
-        if not same_shape:  # pad/crop img
-            h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)]
-        return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447)  # value = imagenet mean
+    h, w = img.shape[2:]
+    s = (int(h * ratio), int(w * ratio))  # new size
+    img = F.interpolate(img, size=s, mode='bilinear', align_corners=False)  # resize
+    if not same_shape:  # pad/crop img
+        h, w = (math.ceil(x * ratio / gs) * gs for x in (h, w))
+    return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447)  # value = imagenet mean
 
 
 def copy_attr(a, b, include=(), exclude=()):
@@ -266,37 +315,117 @@ def copy_attr(a, b, include=(), exclude=()):
             setattr(a, k, v)
 
 
+def smart_optimizer(model, name='Adam', lr=0.001, momentum=0.9, decay=1e-5):
+    # YOLOv5 3-param group optimizer: 0) weights with decay, 1) weights no decay, 2) biases no decay
+    g = [], [], []  # optimizer parameter groups
+    bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k)  # normalization layers, i.e. BatchNorm2d()
+    for v in model.modules():
+        for p_name, p in v.named_parameters(recurse=0):
+            if p_name == 'bias':  # bias (no decay)
+                g[2].append(p)
+            elif p_name == 'weight' and isinstance(v, bn):  # weight (no decay)
+                g[1].append(p)
+            else:
+                g[0].append(p)  # weight (with decay)
+
+    if name == 'Adam':
+        optimizer = torch.optim.Adam(g[2], lr=lr, betas=(momentum, 0.999))  # adjust beta1 to momentum
+    elif name == 'AdamW':
+        optimizer = torch.optim.AdamW(g[2], lr=lr, betas=(momentum, 0.999), weight_decay=0.0)
+    elif name == 'RMSProp':
+        optimizer = torch.optim.RMSprop(g[2], lr=lr, momentum=momentum)
+    elif name == 'SGD':
+        optimizer = torch.optim.SGD(g[2], lr=lr, momentum=momentum, nesterov=True)
+    else:
+        raise NotImplementedError(f'Optimizer {name} not implemented.')
+
+    optimizer.add_param_group({'params': g[0], 'weight_decay': decay})  # add g0 with weight_decay
+    optimizer.add_param_group({'params': g[1], 'weight_decay': 0.0})  # add g1 (BatchNorm2d weights)
+    LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}) with parameter groups "
+                f'{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias')
+    return optimizer
+
+
+def smart_hub_load(repo='ultralytics/yolov5', model='yolov5s', **kwargs):
+    # YOLOv5 torch.hub.load() wrapper with smart error/issue handling
+    if check_version(torch.__version__, '1.9.1'):
+        kwargs['skip_validation'] = True  # validation causes GitHub API rate limit errors
+    if check_version(torch.__version__, '1.12.0'):
+        kwargs['trust_repo'] = True  # argument required starting in torch 0.12
+    try:
+        return torch.hub.load(repo, model, **kwargs)
+    except Exception:
+        return torch.hub.load(repo, model, force_reload=True, **kwargs)
+
+
+def smart_resume(ckpt, optimizer, ema=None, weights='yolov5s.pt', epochs=300, resume=True):
+    # Resume training from a partially trained checkpoint
+    best_fitness = 0.0
+    start_epoch = ckpt['epoch'] + 1
+    if ckpt['optimizer'] is not None:
+        optimizer.load_state_dict(ckpt['optimizer'])  # optimizer
+        best_fitness = ckpt['best_fitness']
+    if ema and ckpt.get('ema'):
+        ema.ema.load_state_dict(ckpt['ema'].float().state_dict())  # EMA
+        ema.updates = ckpt['updates']
+    if resume:
+        assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.\n' \
+                                f"Start a new training without --resume, i.e. 'python train.py --weights {weights}'"
+        LOGGER.info(f'Resuming training from {weights} from epoch {start_epoch} to {epochs} total epochs')
+    if epochs < start_epoch:
+        LOGGER.info(f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs.")
+        epochs += ckpt['epoch']  # finetune additional epochs
+    return best_fitness, start_epoch, epochs
+
+
+class EarlyStopping:
+    # YOLOv5 simple early stopper
+    def __init__(self, patience=30):
+        self.best_fitness = 0.0  # i.e. mAP
+        self.best_epoch = 0
+        self.patience = patience or float('inf')  # epochs to wait after fitness stops improving to stop
+        self.possible_stop = False  # possible stop may occur next epoch
+
+    def __call__(self, epoch, fitness):
+        if fitness >= self.best_fitness:  # >= 0 to allow for early zero-fitness stage of training
+            self.best_epoch = epoch
+            self.best_fitness = fitness
+        delta = epoch - self.best_epoch  # epochs without improvement
+        self.possible_stop = delta >= (self.patience - 1)  # possible stop may occur next epoch
+        stop = delta >= self.patience  # stop training if patience exceeded
+        if stop:
+            LOGGER.info(f'Stopping training early as no improvement observed in last {self.patience} epochs. '
+                        f'Best results observed at epoch {self.best_epoch}, best model saved as best.pt.\n'
+                        f'To update EarlyStopping(patience={self.patience}) pass a new patience value, '
+                        f'i.e. `python train.py --patience 300` or use `--patience 0` to disable EarlyStopping.')
+        return stop
+
+
 class ModelEMA:
-    """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
-    Keep a moving average of everything in the model state_dict (parameters and buffers).
-    This is intended to allow functionality like
-    https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
-    A smoothed version of the weights is necessary for some training schemes to perform well.
-    This class is sensitive where it is initialized in the sequence of model init,
-    GPU assignment and distributed training wrappers.
+    """ Updated Exponential Moving Average (EMA) from https://github.com/rwightman/pytorch-image-models
+    Keeps a moving average of everything in the model state_dict (parameters and buffers)
+    For EMA details see https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
     """
 
-    def __init__(self, model, decay=0.9999, updates=0):
+    def __init__(self, model, decay=0.9999, tau=2000, updates=0):
         # Create EMA
-        self.ema = deepcopy(model.module if is_parallel(model) else model).eval()  # FP32 EMA
-        # if next(model.parameters()).device.type != 'cpu':
-        #     self.ema.half()  # FP16 EMA
+        self.ema = deepcopy(de_parallel(model)).eval()  # FP32 EMA
         self.updates = updates  # number of EMA updates
-        self.decay = lambda x: decay * (1 - math.exp(-x / 2000))  # decay exponential ramp (to help early epochs)
+        self.decay = lambda x: decay * (1 - math.exp(-x / tau))  # decay exponential ramp (to help early epochs)
         for p in self.ema.parameters():
             p.requires_grad_(False)
 
     def update(self, model):
         # Update EMA parameters
-        with torch.no_grad():
-            self.updates += 1
-            d = self.decay(self.updates)
-
-            msd = model.module.state_dict() if is_parallel(model) else model.state_dict()  # model state_dict
-            for k, v in self.ema.state_dict().items():
-                if v.dtype.is_floating_point:
-                    v *= d
-                    v += (1. - d) * msd[k].detach()
+        self.updates += 1
+        d = self.decay(self.updates)
+
+        msd = de_parallel(model).state_dict()  # model state_dict
+        for k, v in self.ema.state_dict().items():
+            if v.dtype.is_floating_point:  # true for FP16 and FP32
+                v *= d
+                v += (1 - d) * msd[k].detach()
+        # assert v.dtype == msd[k].dtype == torch.float32, f'{k}: EMA {v.dtype} and model {msd[k].dtype} must be FP32'
 
     def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
         # Update EMA attributes
diff --git a/utils/triton.py b/utils/triton.py
new file mode 100644
index 000000000000..25928021477e
--- /dev/null
+++ b/utils/triton.py
@@ -0,0 +1,85 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+""" Utils to interact with the Triton Inference Server
+"""
+
+import typing
+from urllib.parse import urlparse
+
+import torch
+
+
+class TritonRemoteModel:
+    """ A wrapper over a model served by the Triton Inference Server. It can
+    be configured to communicate over GRPC or HTTP. It accepts Torch Tensors
+    as input and returns them as outputs.
+    """
+
+    def __init__(self, url: str):
+        """
+        Keyword arguments:
+        url: Fully qualified address of the Triton server - for e.g. grpc://localhost:8000
+        """
+
+        parsed_url = urlparse(url)
+        if parsed_url.scheme == 'grpc':
+            from tritonclient.grpc import InferenceServerClient, InferInput
+
+            self.client = InferenceServerClient(parsed_url.netloc)  # Triton GRPC client
+            model_repository = self.client.get_model_repository_index()
+            self.model_name = model_repository.models[0].name
+            self.metadata = self.client.get_model_metadata(self.model_name, as_json=True)
+
+            def create_input_placeholders() -> typing.List[InferInput]:
+                return [
+                    InferInput(i['name'], [int(s) for s in i['shape']], i['datatype']) for i in self.metadata['inputs']]
+
+        else:
+            from tritonclient.http import InferenceServerClient, InferInput
+
+            self.client = InferenceServerClient(parsed_url.netloc)  # Triton HTTP client
+            model_repository = self.client.get_model_repository_index()
+            self.model_name = model_repository[0]['name']
+            self.metadata = self.client.get_model_metadata(self.model_name)
+
+            def create_input_placeholders() -> typing.List[InferInput]:
+                return [
+                    InferInput(i['name'], [int(s) for s in i['shape']], i['datatype']) for i in self.metadata['inputs']]
+
+        self._create_input_placeholders_fn = create_input_placeholders
+
+    @property
+    def runtime(self):
+        """Returns the model runtime"""
+        return self.metadata.get('backend', self.metadata.get('platform'))
+
+    def __call__(self, *args, **kwargs) -> typing.Union[torch.Tensor, typing.Tuple[torch.Tensor, ...]]:
+        """ Invokes the model. Parameters can be provided via args or kwargs.
+        args, if provided, are assumed to match the order of inputs of the model.
+        kwargs are matched with the model input names.
+        """
+        inputs = self._create_inputs(*args, **kwargs)
+        response = self.client.infer(model_name=self.model_name, inputs=inputs)
+        result = []
+        for output in self.metadata['outputs']:
+            tensor = torch.as_tensor(response.as_numpy(output['name']))
+            result.append(tensor)
+        return result[0] if len(result) == 1 else result
+
+    def _create_inputs(self, *args, **kwargs):
+        args_len, kwargs_len = len(args), len(kwargs)
+        if not args_len and not kwargs_len:
+            raise RuntimeError('No inputs provided.')
+        if args_len and kwargs_len:
+            raise RuntimeError('Cannot specify args and kwargs at the same time')
+
+        placeholders = self._create_input_placeholders_fn()
+        if args_len:
+            if args_len != len(placeholders):
+                raise RuntimeError(f'Expected {len(placeholders)} inputs, got {args_len}.')
+            for input, value in zip(placeholders, args):
+                input.set_data_from_numpy(value.cpu().numpy())
+        else:
+            for input in placeholders:
+                value = kwargs[input.name]
+                input.set_data_from_numpy(value.cpu().numpy())
+        return placeholders
diff --git a/utils/wandb_logging/log_dataset.py b/utils/wandb_logging/log_dataset.py
deleted file mode 100644
index f45a23011f15..000000000000
--- a/utils/wandb_logging/log_dataset.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import argparse
-
-import yaml
-
-from wandb_utils import WandbLogger
-
-WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'
-
-
-def create_dataset_artifact(opt):
-    with open(opt.data) as f:
-        data = yaml.safe_load(f)  # data dict
-    logger = WandbLogger(opt, '', None, data, job_type='Dataset Creation')
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path')
-    parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
-    parser.add_argument('--project', type=str, default='YOLOv5', help='name of W&B Project')
-    opt = parser.parse_args()
-    opt.resume = False  # Explicitly disallow resume check for dataset upload job
-
-    create_dataset_artifact(opt)
diff --git a/utils/wandb_logging/wandb_utils.py b/utils/wandb_logging/wandb_utils.py
deleted file mode 100644
index d8fbd1ef42aa..000000000000
--- a/utils/wandb_logging/wandb_utils.py
+++ /dev/null
@@ -1,306 +0,0 @@
-import json
-import sys
-from pathlib import Path
-
-import torch
-import yaml
-from tqdm import tqdm
-
-sys.path.append(str(Path(__file__).parent.parent.parent))  # add utils/ to path
-from utils.datasets import LoadImagesAndLabels
-from utils.datasets import img2label_paths
-from utils.general import colorstr, xywh2xyxy, check_dataset
-
-try:
-    import wandb
-    from wandb import init, finish
-except ImportError:
-    wandb = None
-
-WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'
-
-
-def remove_prefix(from_string, prefix=WANDB_ARTIFACT_PREFIX):
-    return from_string[len(prefix):]
-
-
-def check_wandb_config_file(data_config_file):
-    wandb_config = '_wandb.'.join(data_config_file.rsplit('.', 1))  # updated data.yaml path
-    if Path(wandb_config).is_file():
-        return wandb_config
-    return data_config_file
-
-
-def get_run_info(run_path):
-    run_path = Path(remove_prefix(run_path, WANDB_ARTIFACT_PREFIX))
-    run_id = run_path.stem
-    project = run_path.parent.stem
-    model_artifact_name = 'run_' + run_id + '_model'
-    return run_id, project, model_artifact_name
-
-
-def check_wandb_resume(opt):
-    process_wandb_config_ddp_mode(opt) if opt.global_rank not in [-1, 0] else None
-    if isinstance(opt.resume, str):
-        if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
-            if opt.global_rank not in [-1, 0]:  # For resuming DDP runs
-                run_id, project, model_artifact_name = get_run_info(opt.resume)
-                api = wandb.Api()
-                artifact = api.artifact(project + '/' + model_artifact_name + ':latest')
-                modeldir = artifact.download()
-                opt.weights = str(Path(modeldir) / "last.pt")
-            return True
-    return None
-
-
-def process_wandb_config_ddp_mode(opt):
-    with open(opt.data) as f:
-        data_dict = yaml.safe_load(f)  # data dict
-    train_dir, val_dir = None, None
-    if isinstance(data_dict['train'], str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX):
-        api = wandb.Api()
-        train_artifact = api.artifact(remove_prefix(data_dict['train']) + ':' + opt.artifact_alias)
-        train_dir = train_artifact.download()
-        train_path = Path(train_dir) / 'data/images/'
-        data_dict['train'] = str(train_path)
-
-    if isinstance(data_dict['val'], str) and data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX):
-        api = wandb.Api()
-        val_artifact = api.artifact(remove_prefix(data_dict['val']) + ':' + opt.artifact_alias)
-        val_dir = val_artifact.download()
-        val_path = Path(val_dir) / 'data/images/'
-        data_dict['val'] = str(val_path)
-    if train_dir or val_dir:
-        ddp_data_path = str(Path(val_dir) / 'wandb_local_data.yaml')
-        with open(ddp_data_path, 'w') as f:
-            yaml.safe_dump(data_dict, f)
-        opt.data = ddp_data_path
-
-
-class WandbLogger():
-    def __init__(self, opt, name, run_id, data_dict, job_type='Training'):
-        # Pre-training routine --
-        self.job_type = job_type
-        self.wandb, self.wandb_run, self.data_dict = wandb, None if not wandb else wandb.run, data_dict
-        # It's more elegant to stick to 1 wandb.init call, but useful config data is overwritten in the WandbLogger's wandb.init call
-        if isinstance(opt.resume, str):  # checks resume from artifact
-            if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
-                run_id, project, model_artifact_name = get_run_info(opt.resume)
-                model_artifact_name = WANDB_ARTIFACT_PREFIX + model_artifact_name
-                assert wandb, 'install wandb to resume wandb runs'
-                # Resume wandb-artifact:// runs here| workaround for not overwriting wandb.config
-                self.wandb_run = wandb.init(id=run_id, project=project, resume='allow')
-                opt.resume = model_artifact_name
-        elif self.wandb:
-            self.wandb_run = wandb.init(config=opt,
-                                        resume="allow",
-                                        project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem,
-                                        name=name,
-                                        job_type=job_type,
-                                        id=run_id) if not wandb.run else wandb.run
-        if self.wandb_run:
-            if self.job_type == 'Training':
-                if not opt.resume:
-                    wandb_data_dict = self.check_and_upload_dataset(opt) if opt.upload_dataset else data_dict
-                    # Info useful for resuming from artifacts
-                    self.wandb_run.config.opt = vars(opt)
-                    self.wandb_run.config.data_dict = wandb_data_dict
-                self.data_dict = self.setup_training(opt, data_dict)
-            if self.job_type == 'Dataset Creation':
-                self.data_dict = self.check_and_upload_dataset(opt)
-        else:
-            prefix = colorstr('wandb: ')
-            print(f"{prefix}Install Weights & Biases for YOLOv5 logging with 'pip install wandb' (recommended)")
-
-    def check_and_upload_dataset(self, opt):
-        assert wandb, 'Install wandb to upload dataset'
-        check_dataset(self.data_dict)
-        config_path = self.log_dataset_artifact(opt.data,
-                                                opt.single_cls,
-                                                'YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem)
-        print("Created dataset config file ", config_path)
-        with open(config_path) as f:
-            wandb_data_dict = yaml.safe_load(f)
-        return wandb_data_dict
-
-    def setup_training(self, opt, data_dict):
-        self.log_dict, self.current_epoch, self.log_imgs = {}, 0, 16  # Logging Constants
-        self.bbox_interval = opt.bbox_interval
-        if isinstance(opt.resume, str):
-            modeldir, _ = self.download_model_artifact(opt)
-            if modeldir:
-                self.weights = Path(modeldir) / "last.pt"
-                config = self.wandb_run.config
-                opt.weights, opt.save_period, opt.batch_size, opt.bbox_interval, opt.epochs, opt.hyp = str(
-                    self.weights), config.save_period, config.total_batch_size, config.bbox_interval, config.epochs, \
-                                                                                                       config.opt['hyp']
-            data_dict = dict(self.wandb_run.config.data_dict)  # eliminates the need for config file to resume
-        if 'val_artifact' not in self.__dict__:  # If --upload_dataset is set, use the existing artifact, don't download
-            self.train_artifact_path, self.train_artifact = self.download_dataset_artifact(data_dict.get('train'),
-                                                                                           opt.artifact_alias)
-            self.val_artifact_path, self.val_artifact = self.download_dataset_artifact(data_dict.get('val'),
-                                                                                       opt.artifact_alias)
-            self.result_artifact, self.result_table, self.val_table, self.weights = None, None, None, None
-            if self.train_artifact_path is not None:
-                train_path = Path(self.train_artifact_path) / 'data/images/'
-                data_dict['train'] = str(train_path)
-            if self.val_artifact_path is not None:
-                val_path = Path(self.val_artifact_path) / 'data/images/'
-                data_dict['val'] = str(val_path)
-                self.val_table = self.val_artifact.get("val")
-                self.map_val_table_path()
-        if self.val_artifact is not None:
-            self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation")
-            self.result_table = wandb.Table(["epoch", "id", "prediction", "avg_confidence"])
-        if opt.bbox_interval == -1:
-            self.bbox_interval = opt.bbox_interval = (opt.epochs // 10) if opt.epochs > 10 else 1
-        return data_dict
-
-    def download_dataset_artifact(self, path, alias):
-        if isinstance(path, str) and path.startswith(WANDB_ARTIFACT_PREFIX):
-            dataset_artifact = wandb.use_artifact(remove_prefix(path, WANDB_ARTIFACT_PREFIX) + ":" + alias)
-            assert dataset_artifact is not None, "'Error: W&B dataset artifact doesn\'t exist'"
-            datadir = dataset_artifact.download()
-            return datadir, dataset_artifact
-        return None, None
-
-    def download_model_artifact(self, opt):
-        if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
-            model_artifact = wandb.use_artifact(remove_prefix(opt.resume, WANDB_ARTIFACT_PREFIX) + ":latest")
-            assert model_artifact is not None, 'Error: W&B model artifact doesn\'t exist'
-            modeldir = model_artifact.download()
-            epochs_trained = model_artifact.metadata.get('epochs_trained')
-            total_epochs = model_artifact.metadata.get('total_epochs')
-            assert epochs_trained < total_epochs, 'training to %g epochs is finished, nothing to resume.' % (
-                total_epochs)
-            return modeldir, model_artifact
-        return None, None
-
-    def log_model(self, path, opt, epoch, fitness_score, best_model=False):
-        model_artifact = wandb.Artifact('run_' + wandb.run.id + '_model', type='model', metadata={
-            'original_url': str(path),
-            'epochs_trained': epoch + 1,
-            'save period': opt.save_period,
-            'project': opt.project,
-            'total_epochs': opt.epochs,
-            'fitness_score': fitness_score
-        })
-        model_artifact.add_file(str(path / 'last.pt'), name='last.pt')
-        wandb.log_artifact(model_artifact,
-                           aliases=['latest', 'epoch ' + str(self.current_epoch), 'best' if best_model else ''])
-        print("Saving model artifact on epoch ", epoch + 1)
-
-    def log_dataset_artifact(self, data_file, single_cls, project, overwrite_config=False):
-        with open(data_file) as f:
-            data = yaml.safe_load(f)  # data dict
-        nc, names = (1, ['item']) if single_cls else (int(data['nc']), data['names'])
-        names = {k: v for k, v in enumerate(names)}  # to index dictionary
-        self.train_artifact = self.create_dataset_table(LoadImagesAndLabels(
-            data['train']), names, name='train') if data.get('train') else None
-        self.val_artifact = self.create_dataset_table(LoadImagesAndLabels(
-            data['val']), names, name='val') if data.get('val') else None
-        if data.get('train'):
-            data['train'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'train')
-        if data.get('val'):
-            data['val'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'val')
-        path = data_file if overwrite_config else '_wandb.'.join(data_file.rsplit('.', 1))  # updated data.yaml path
-        data.pop('download', None)
-        with open(path, 'w') as f:
-            yaml.safe_dump(data, f)
-
-        if self.job_type == 'Training':  # builds correct artifact pipeline graph
-            self.wandb_run.use_artifact(self.val_artifact)
-            self.wandb_run.use_artifact(self.train_artifact)
-            self.val_artifact.wait()
-            self.val_table = self.val_artifact.get('val')
-            self.map_val_table_path()
-        else:
-            self.wandb_run.log_artifact(self.train_artifact)
-            self.wandb_run.log_artifact(self.val_artifact)
-        return path
-
-    def map_val_table_path(self):
-        self.val_table_map = {}
-        print("Mapping dataset")
-        for i, data in enumerate(tqdm(self.val_table.data)):
-            self.val_table_map[data[3]] = data[0]
-
-    def create_dataset_table(self, dataset, class_to_id, name='dataset'):
-        # TODO: Explore multiprocessing to slpit this loop parallely| This is essential for speeding up the the logging
-        artifact = wandb.Artifact(name=name, type="dataset")
-        img_files = tqdm([dataset.path]) if isinstance(dataset.path, str) and Path(dataset.path).is_dir() else None
-        img_files = tqdm(dataset.img_files) if not img_files else img_files
-        for img_file in img_files:
-            if Path(img_file).is_dir():
-                artifact.add_dir(img_file, name='data/images')
-                labels_path = 'labels'.join(dataset.path.rsplit('images', 1))
-                artifact.add_dir(labels_path, name='data/labels')
-            else:
-                artifact.add_file(img_file, name='data/images/' + Path(img_file).name)
-                label_file = Path(img2label_paths([img_file])[0])
-                artifact.add_file(str(label_file),
-                                  name='data/labels/' + label_file.name) if label_file.exists() else None
-        table = wandb.Table(columns=["id", "train_image", "Classes", "name"])
-        class_set = wandb.Classes([{'id': id, 'name': name} for id, name in class_to_id.items()])
-        for si, (img, labels, paths, shapes) in enumerate(tqdm(dataset)):
-            height, width = shapes[0]
-            labels[:, 2:] = (xywh2xyxy(labels[:, 2:].view(-1, 4))) * torch.Tensor([width, height, width, height])
-            box_data, img_classes = [], {}
-            for cls, *xyxy in labels[:, 1:].tolist():
-                cls = int(cls)
-                box_data.append({"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
-                                 "class_id": cls,
-                                 "box_caption": "%s" % (class_to_id[cls]),
-                                 "scores": {"acc": 1},
-                                 "domain": "pixel"})
-                img_classes[cls] = class_to_id[cls]
-            boxes = {"ground_truth": {"box_data": box_data, "class_labels": class_to_id}}  # inference-space
-            table.add_data(si, wandb.Image(paths, classes=class_set, boxes=boxes), json.dumps(img_classes),
-                           Path(paths).name)
-        artifact.add(table, name)
-        return artifact
-
-    def log_training_progress(self, predn, path, names):
-        if self.val_table and self.result_table:
-            class_set = wandb.Classes([{'id': id, 'name': name} for id, name in names.items()])
-            box_data = []
-            total_conf = 0
-            for *xyxy, conf, cls in predn.tolist():
-                if conf >= 0.25:
-                    box_data.append(
-                        {"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
-                         "class_id": int(cls),
-                         "box_caption": "%s %.3f" % (names[cls], conf),
-                         "scores": {"class_score": conf},
-                         "domain": "pixel"})
-                    total_conf = total_conf + conf
-            boxes = {"predictions": {"box_data": box_data, "class_labels": names}}  # inference-space
-            id = self.val_table_map[Path(path).name]
-            self.result_table.add_data(self.current_epoch,
-                                       id,
-                                       wandb.Image(self.val_table.data[id][1], boxes=boxes, classes=class_set),
-                                       total_conf / max(1, len(box_data))
-                                       )
-
-    def log(self, log_dict):
-        if self.wandb_run:
-            for key, value in log_dict.items():
-                self.log_dict[key] = value
-
-    def end_epoch(self, best_result=False):
-        if self.wandb_run:
-            wandb.log(self.log_dict)
-            self.log_dict = {}
-            if self.result_artifact:
-                train_results = wandb.JoinedTable(self.val_table, self.result_table, "id")
-                self.result_artifact.add(train_results, 'result')
-                wandb.log_artifact(self.result_artifact, aliases=['latest', 'epoch ' + str(self.current_epoch),
-                                                                  ('best' if best_result else '')])
-                self.result_table = wandb.Table(["epoch", "id", "prediction", "avg_confidence"])
-                self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation")
-
-    def finish_run(self):
-        if self.wandb_run:
-            if self.log_dict:
-                wandb.log(self.log_dict)
-            wandb.run.finish()
diff --git a/weights/download_weights.sh b/weights/download_weights.sh
deleted file mode 100755
index 1485ed1d1cac..000000000000
--- a/weights/download_weights.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/bash
-# Download latest models from https://github.com/ultralytics/yolov5/releases
-# Usage:
-#    $ bash weights/download_weights.sh
-
-/opt/conda/bin/python3.8 - <<EOF
-from utils.google_utils import attempt_download
-
-for x in ['s', 'm', 'l', 'x']:
-    attempt_download(f'yolov5{x}.pt')
-
-EOF