sparse_yoda.py

# -*- coding: utf-8 -*-
"""Sparse_Yoda.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/142kqd46WCmqzc7y1EHtNQi51J1mtTzNz

# Sparse Yoda (ver. 1.0)

***

Powered by tegridy-tools: https://github.com/asigalov61/tegridy-tools

***

Credit for the Sparse Trainsformer implementation used in this colab goes out @lucidrains https://github.com/lucidrains/sinkhorn-transformer

***

WARNING: This complete implementation is a functioning model of the Artificial Intelligence. Please excercise great humility, care, and respect. https://www.nscai.gov/

***

#### Project Los Angeles

#### Tegridy Code 2022

***

# (Setup Environment)
"""

#@title nvidia-smi gpu check
!nvidia-smi

#@title Install all dependencies (run only once per session)

!git clone https://github.com/asigalov61/Yoda

!pip install sinkhorn_transformer

!pip install torch
!pip install tqdm
!pip install pickle5
!pip install matplotlib

!apt install fluidsynth #Pip does not work for some reason. Only apt works
!pip install midi2audio
!pip install pretty_midi

#@title Import all needed modules

print('Loading needed modules. Please wait...')
import os
import copy
import random
import tqdm
import pickle5 as pickle

from sinkhorn_transformer import SinkhornTransformerLM
from sinkhorn_transformer.autoregressive_wrapper import AutoregressiveWrapper

import numpy as np
import torch
import torch.optim as optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset

print('Loading TMIDIX module...')
os.chdir('/content/Yoda')
import TMIDIX

print('Loading aux modules...')
import matplotlib.pyplot as plt

from midi2audio import FluidSynth
import pretty_midi
import librosa.display
from IPython.display import display, Javascript, HTML, Audio

os.chdir('/content/')
print('Done! Enjoy! :)')

"""# (MODEL)"""

# Commented out IPython magic to ensure Python compatibility.
#@title Download pre-trained Sparse Yoda model and the training data file
# %cd /content/

print('=' * 70)
print('Unzipping pre-trained dataset-model...Please wait...')
print('=' * 70)

!wget --no-check-certificate -O 'Sparse-Yoda-Train-Data.pickle' "https://onedrive.live.com/download?cid=8A0D502FC99C608F&resid=8A0D502FC99C608F%2118567&authkey=AGy0sliZlnM0rEk"

print('=' * 70)

!wget --no-check-certificate -O 'Sparse-Yoda-Trained-Model.pth' "https://onedrive.live.com/download?cid=8A0D502FC99C608F&resid=8A0D502FC99C608F%2118566&authkey=APcucQuadwRnHtQ"


print('Done! Enjoy! :)')
print('=' * 70)
# %cd /content/

"""# (LOAD)"""

#@title Load and prep the original training data which will be used to prime the model
full_path_to_original_training_data = "/content/Sparse-Yoda-Train-Data.pickle" #@param {type:"string"}

melody_chords_f = pickle.load(open(full_path_to_original_training_data, 'rb'))

randomize_dataset = False

print('=' * 70)
print('Prepping INTs dataset...')

if randomize_dataset:
    print('=' * 70)
    print('Randomizing the dataset...')
    random.shuffle(melody_chords_f)
    print('Done!')
    
print('=' * 70)
print('Processing the dataset...')

r = 0

train_data1 = []

itimes = []
ipitches = []


for chords_list in tqdm.tqdm(melody_chords_f):
    
    train_data1.extend([0]) # Intro/Zero Token
    
    for i in chords_list:

        if i[0] != 0: # This is the chordification line
            train_data1.extend([i[0]]) # start-times
            itimes.extend([i[0]])
            ipitches.extend([i[1] + (i[2] * 16) + (i[3] * 16 * 128)])
        # And this is the main MIDI note line (triple stack)
        main_note = [i[1] + (i[2] * 16) + (i[3] * 16 * 128)] # Main note == [duration / pitch / channel]
        
        if main_note != [0]: # Main note error control...
            train_data1.extend(main_note) # Main note == [duration / pitch / channel]

print('Done!')        
print('=' * 70)
        
print('Total INTs:', len(train_data1))
print('Minimum INT:', min(train_data1))
print('Maximum INT:', max(train_data1))
print('Unique INTs:', len(set(train_data1)))
print('Intro/Zero INTs:', train_data1.count(0))
print('=' * 70)

#@title Load/Reload the model

from collections import OrderedDict

full_path_to_model_checkpoint = "/content/Sparse-Yoda-Trained-Model.pth" #@param {type:"string"}

print('Loading the model...')
# constants

NUM_BATCHES = int(1e5)
BATCH_SIZE = 4
GRADIENT_ACCUMULATE_EVERY = 4
LEARNING_RATE = 1e-4
VALIDATE_EVERY  = 100
GENERATE_EVERY  = 500
GENERATE_LENGTH = 512
SEQ_LEN = 4096

model = SinkhornTransformerLM(
    num_tokens = max(train_data1)+1,
    emb_dim = 128,
    dim = 1024,
    depth = 16,
    max_seq_len = SEQ_LEN,
    heads = 16,
    bucket_size = 128,
    ff_chunks = 2,
    causal = True,
    reversible = True,
    attn_dropout = 0.1,
    n_local_attn_heads = 4
)

model = AutoregressiveWrapper(model)
model.cuda()

model.load_state_dict(torch.load(full_path_to_model_checkpoint))
model.eval()

print('Done!')

"""# (GENERATE MUSIC)

# Custom MIDI option
"""

#@title Custom MIDI
full_path_to_custom_MIDI = "/content/Yoda/seed4.mid" #@param {type:"string"}

score = TMIDIX.midi2ms_score(open(full_path_to_custom_MIDI, 'rb').read())

events_matrix = []

itrack = 1

patches = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

patch_map = [[0, 1, 2, 3, 4, 5, 6, 7], # Piano 
              [24, 25, 26, 27, 28, 29, 30], # Guitar
              [32, 33, 34, 35, 36, 37, 38, 39], # Bass
              [40, 41], # Violin
              [42, 43], # Cello
              [46], # Harp
              [56, 57, 58, 59, 60], # Trumpet
              [71, 72], # Clarinet
              [73, 74, 75], # Flute
              [-1], # Fake Drums
              [52, 53] # Choir
            ]

while itrack < len(score):
    for event in score[itrack]:         
        if event[0] == 'note' or event[0] == 'patch_change':
            events_matrix.append(event)
    itrack += 1

events_matrix1 = []
for event in events_matrix:
        if event[0] == 'patch_change':
            patches[event[2]] = event[3]

        if event[0] == 'note':
            event.extend([patches[event[3]]])
            once = False
            
            for p in patch_map:
                if event[6] in p and event[3] != 9: # Except the drums
                    event[3] = patch_map.index(p)
                    once = True
                    
            if not once and event[3] != 9: # Except the drums
                event[3] = 0 # All other instruments/patches channel
                event[5] = max(80, event[5])
                
            if event[3] < 11: # We won't write chans 11-16 for now...
                events_matrix1.append(event)
                #stats[event[3]] += 1

# recalculating timings

for e in events_matrix1:
    e[1] = int(e[1] / 16)
    e[2] = int(e[2] / 128)

# final processing...

if len(events_matrix1) > 0:
    
    events_matrix1.sort(key=lambda x: (x[1], x[4]))

    cho = []
    pe = events_matrix1[0]
    melody_chords = []
    for e in events_matrix1:

        time = max(0, min(255, e[1]-pe[1]))
        dur = max(0, min(15, e[2]))
        cha = max(0, min(15, e[3]))
        ptc = max(0, min(127, e[4]))
        vel = max(0, min(127, e[5]))

        melody_chords.append([time, dur, ptc, cha, vel])

        pe = e
inputs = []
for i in melody_chords:

  if i[0] != 0: # This is the chordification line
      inputs.extend([i[0]]) # start-times
      
  # And this is the main MIDI note line (triple stack)
  main_note = [i[1] + (i[2] * 16) + (i[3] * 16 * 128)] # Main note == [duration / pitch / channel]
  
  if main_note != [0]: # Main note error control...
      inputs.extend(main_note) # Main note == [duration / pitch / channel]

print('Done!')

"""# Generate"""

#@title Generate
priming_type = "Custom MIDI" #@param ["Custom MIDI", "Random Dataset Point"]
number_of_prime_tokens = 256 #@param {type:"slider", min:32, max:512, step:32}
number_of_tokens_to_generate = 256 #@param {type:"slider", min:64, max:512, step:64}
temperature = 0.8 #@param {type:"slider", min:0.1, max:1, step:0.1}


if priming_type == 'Random Dataset Point':
  # Random dataset point
  r = random.randint(0, int(len(train_data1) / 1))
  out = train_data1[r:r+number_of_prime_tokens]

else:
  # Custom MIDI line
  out = inputs[:number_of_prime_tokens] 

out1 = []
out1.extend(out)

sample = model.generate(torch.LongTensor(out).cuda(), number_of_tokens_to_generate, temperature=temperature)

out2 = sample.cpu().numpy().tolist()
out1.extend(out2)

if len(out1) != 0:
    
    song = out1
    song_f = []
    time = 0
    dur = 0
    vel = 0
    pitch = 0
    channel = 0
    
    for s in song:
        if s < 256:
            time += s * 16
            
        else:
            channel = s // 16 // 128

            pitch = (s // 16) % 128
            
            dur = ((s % 16) * 128) + 128
            
            # Velocities for each channel:
            if channel == 0:  # Piano     
                vel = 60
            if channel == 1:  # Guitar     
                vel = 70            
            if channel == 2:  # Bass     
                vel = 60            
            if channel == 3:  # Violin
                vel = 90            
            if channel == 4:  # Cello     
                vel = 100
            if channel == 5:  # Harp     
                vel = 80
            if channel == 6:  # Trumpet     
                vel = 100            
            if channel == 7:  # Clarinet     
                vel = 100           
            if channel == 8:  # Flute
                vel = 100                          
            if channel == 9:  # Drums
                vel = 80            
            if channel == 10:  # Choir     
                vel = 110                  
                               
            song_f.append(['note', time, dur, channel, pitch, vel ])

    detailed_stats = TMIDIX.Tegridy_SONG_to_MIDI_Converter(song_f,
                                                        output_signature = 'Sparse Yoda',  
                                                        output_file_name = '/content/Sparse-Yoda-Music-Composition', 
                                                        track_name='Project Los Angeles',
                                                        list_of_MIDI_patches=[0, 24, 32, 40, 42, 46, 56, 71, 73, 0, 53, 0, 0, 0, 0, 0],
                                                        number_of_ticks_per_quarter=500)

    print('Done!')

print('=' * 70)
print('Displaying resulting composition...')
fname = 'Sparse-Yoda-Music-Composition'

pm = pretty_midi.PrettyMIDI(fname + '.mid')

# Retrieve piano roll of the MIDI file
piano_roll = pm.get_piano_roll()

plt.figure(figsize=(14, 5))
librosa.display.specshow(piano_roll, x_axis='time', y_axis='cqt_note', fmin=1, hop_length=160, sr=16000, cmap=plt.cm.hot)
plt.title(fname)

FluidSynth("/usr/share/sounds/sf2/FluidR3_GM.sf2", 16000).midi_to_audio(str(fname + '.mid'), str(fname + '.wav'))
Audio(str(fname + '.wav'), rate=16000)

"""# Congrats! You did it! :)"""