midi_tempo_detective.py

# -*- coding: utf-8 -*-
"""MIDI_Tempo_Detective.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1mEp3FT9f4tliugQVV-nVojeR_FYLpfIP

# MIDI Tempo Detective (ver. 1.0)

***

Powered by tegridy-tools: https://github.com/asigalov61/tegridy-tools

***

Credit for GPT2-RGA code used in this colab goes out @ Sashmark97 https://github.com/Sashmark97/midigen and @ Damon Gwinn https://github.com/gwinndr/MusicTransformer-Pytorch

***

WARNING: This complete implementation is a functioning model of the Artificial Intelligence. Please excercise great humility, care, and respect. https://www.nscai.gov/

***

#### Project Los Angeles

#### Tegridy Code 2022

***

# (Setup Environment)
"""

#@title nvidia-smi gpu check
!nvidia-smi

#@title Install all dependencies (run only once per session)

!git clone https://github.com/asigalov61/MIDI-Tempo-Detective
!pip install torch
!pip install tqdm
!pip install matplotlib

!pip install torch-summary
!pip install sklearn

#@title Import all needed modules

print('Loading needed modules. Please wait...')
import os
from tqdm import tqdm
import random
import secrets
from collections import OrderedDict

print('Loading TMIDIX and GPT2RGAX modules...')
os.chdir('/content/MIDI-Tempo-Detective')
import TMIDIX
from GPT2RGAX import *

import matplotlib.pyplot as plt

from torchsummary import summary
from sklearn import metrics

os.chdir('/content/')

"""# (PREP THE MODEL)"""

# Commented out IPython magic to ensure Python compatibility.
#@title Unzip Pre-Trained MIDI Tempo Detective Model
# %cd /content/MIDI-Tempo-Detective/Model

print('=' * 70)
print('Unzipping pre-trained MIDI Tempo Detective model...Please wait...')

!cat MIDI-Tempo-Detective-Trained-Model.zip* > MIDI-Tempo-Detective-Trained-Model.zip
print('=' * 70)

!unzip -j MIDI-Tempo-Detective-Trained-Model.zip
print('=' * 70)

print('Done! Enjoy! :)')
print('=' * 70)
# %cd /content/

#@title LOAD/RELOAD MIDI Tempo Detective Model
print('Loading MIDI Tempo Detective model...')
config = GPTConfig(260, 
                  256,
                  dim_feedforward=256,
                  n_layer=32, 
                  n_head=16, 
                  n_embd=256,
                  enable_rpr=True,
                  er_len=256)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = GPT(config)

state_dict = torch.load('/content/MIDI-Tempo-Detective/Model/MIDI-Tempo-Detective-Trained-Model_16000_steps_0.1938_loss.pth', map_location=device)

new_state_dict = OrderedDict()
for k, v in state_dict.items():
    name = k[7:] #remove 'module'
    new_state_dict[name] = v

model.load_state_dict(new_state_dict)

model.to(device)

model.eval()

print('Done!')

summary(model)

cos_sim = metrics.pairwise.cosine_similarity(
    model.tok_emb.weight.detach().cpu().numpy()
)
plt.figure(figsize=(8, 8))
plt.imshow(cos_sim, cmap="inferno", interpolation="none")
im_ratio = cos_sim.shape[0] / cos_sim.shape[1]
plt.colorbar(fraction=0.046 * im_ratio, pad=0.04)
plt.xlabel("Position")
plt.ylabel("Position")
plt.tight_layout()
plt.plot()
plt.savefig("/content/MIDI-Tempo-Detective-Positional-Embeddings-Plot.png", bbox_inches="tight")

"""# (LOAD SOURCE MIDI)"""

#@title Load source MIDI file
full_path_to_MIDI_file = "/content/MIDI-Tempo-Detective/MIDI-Tempo-Detective-Sample-MIDI.mid" #@param {type:"string"}

score = TMIDIX.midi2score(open(full_path_to_MIDI_file, 'rb').read())

events_matrix = []

itrack = 1

while itrack < len(score):
    for event in score[itrack]:         
        if event[0] == 'note' or event[0] == 'set_tempo':
            events_matrix.append(event)
    itrack += 1

events_matrix.sort(key=lambda x: x[1])

tempos = []
melody_chords_f = []

for e in events_matrix:
  if e[0] != 'set_tempo':
    tempos.append(e[1])
  else:
      
    tempos = []
    tempos.append(e)

melody_chords_f.append([score[0], tempos[0][1:], tempos[1:]])

D = melody_chords_f[0]

INTS = []
INTS.append(259) # SOS/EOS

ticks1, ticks2, ticks3 = min(256*256*255, D[0]).to_bytes(3, 'big')
INTS.extend([ticks1, ticks2, ticks3])

INTS.append(257) # TICKS PAD

pe = D[2][0]
for d in D[2][1:120]:
  dtime = min(256*255, d - pe)
  dt1, dt2 = dtime.to_bytes(2, 'big')
  INTS.extend([dt1, dt2])

  pe = d

INTS.extend([258]) # TIMES PAD

tempo1, tempo2, tempo3 = min(256*256*255, D[1][1]).to_bytes(3, 'big')

print('Source MIDI ticks:', score[0])
print('Source MIDI tempo', min(256*256*255, D[1][1]))
print('Source MIDI tempo (bytes)', tempo1, tempo2, tempo3)

"""# (DETECT)"""

#@title Detect Tempo

print('=' * 70)
print('MIDI Tempo Detective Model Generator')
print('=' * 70)
print('Detecting tempo...Please wait...')
print('=' * 70)
rand_seq = model.generate_batches(torch.Tensor(INTS), 
                                          target_seq_length=256,
                                          temperature=0.8,
                                          num_batches=24,
                                          verbose=True)
out = rand_seq.cpu().tolist()
d2 = []

# print('=' * 70)

for i in range(len(out)):
  out1 = out[i]

  d1 = 0

  d1 = d1.from_bytes(out1[out1.index(258)+1:out1.index(258)+4], 'big')

  d2.append(d1)
  
  # print(d1)

print('=' * 70)
print('Average detected tempo:', int(sum(d2) / len(d2)))
print('=' * 70)
print('Best detected tempo', max(set(d2), key = d2.count))
print('=' * 70)

"""# Congrats! You did it! :)"""