1 Star 0 Fork 0


加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
preprocess.py 5.51 KB
一键复制 编辑 原始数据 按行查看 历史
Ji-Sung Linux 提交于 2016-04-13 18:23 . Modularized code
Author: Ji-Sung Kim
Project: deepjazz
Purpose: Parse, cleanup and process data.
Code adapted from Evan Chow's jazzml, https://github.com/evancchow/jazzml with
express permission.
from __future__ import print_function
from music21 import *
from collections import defaultdict, OrderedDict
from itertools import groupby, izip_longest
from grammar import *
#----------------------------HELPER FUNCTIONS----------------------------------#
''' Helper function to parse a MIDI file into its measures and chords '''
def __parse_midi(data_fn):
# Parse the MIDI data for separate melody and accompaniment parts.
midi_data = converter.parse(data_fn)
# Get melody part, compress into single voice.
melody_stream = midi_data[5] # For Metheny piece, Melody is Part #5.
melody1, melody2 = melody_stream.getElementsByClass(stream.Voice)
for j in melody2:
melody1.insert(j.offset, j)
melody_voice = melody1
for i in melody_voice:
if i.quarterLength == 0.0:
i.quarterLength = 0.25
# Change key signature to adhere to comp_stream (1 sharp, mode = major).
# Also add Electric Guitar.
melody_voice.insert(0, instrument.ElectricGuitar())
melody_voice.insert(0, key.KeySignature(sharps=1, mode='major'))
# The accompaniment parts. Take only the best subset of parts from
# the original data. Maybe add more parts, hand-add valid instruments.
# Should add least add a string part (for sparse solos).
# Verified are good parts: 0, 1, 6, 7 '''
partIndices = [0, 1, 6, 7]
comp_stream = stream.Voice()
comp_stream.append([j.flat for i, j in enumerate(midi_data)
if i in partIndices])
# Full stream containing both the melody and the accompaniment.
# All parts are flattened.
full_stream = stream.Voice()
for i in xrange(len(comp_stream)):
# Extract solo stream, assuming you know the positions ..ByOffset(i, j).
# Note that for different instruments (with stream.flat), you NEED to use
# stream.Part(), not stream.Voice().
# Accompanied solo is in range [478, 548)
solo_stream = stream.Voice()
for part in full_stream:
curr_part = stream.Part()
curr_part.append(part.getElementsByOffset(476, 548,
cp = curr_part.flat
# Group by measure so you can classify.
# Note that measure 0 is for the time signature, metronome, etc. which have
# an offset of 0.0.
melody_stream = solo_stream[-1]
measures = OrderedDict()
offsetTuples = [(int(n.offset / 4), n) for n in melody_stream]
measureNum = 0 # for now, don't use real m. nums (119, 120)
for key_x, group in groupby(offsetTuples, lambda x: x[0]):
measures[measureNum] = [n[1] for n in group]
measureNum += 1
# Get the stream of chords.
# offsetTuples_chords: group chords by measure number.
chordStream = solo_stream[0]
offsetTuples_chords = [(int(n.offset / 4), n) for n in chordStream]
# Generate the chord structure. Use just track 1 (piano) since it is
# the only instrument that has chords.
# Group into 4s, just like before.
chords = OrderedDict()
measureNum = 0
for key_x, group in groupby(offsetTuples_chords, lambda x: x[0]):
chords[measureNum] = [n[1] for n in group]
measureNum += 1
# Fix for the below problem.
# 1) Find out why len(measures) != len(chords).
# ANSWER: resolves at end but melody ends 1/16 before last measure so doesn't
# actually show up, while the accompaniment's beat 1 right after does.
# Actually on second thought: melody/comp start on Ab, and resolve to
# the same key (Ab) so could actually just cut out last measure to loop.
# Decided: just cut out the last measure.
del chords[len(chords) - 1]
assert len(chords) == len(measures)
return measures, chords
''' Helper function to get the grammatical data from given musical data. '''
def __get_abstract_grammars(measures, chords):
# extract grammars
abstract_grammars = []
for ix in xrange(1, len(measures)):
m = stream.Voice()
for i in measures[ix]:
m.insert(i.offset, i)
c = stream.Voice()
for j in chords[ix]:
c.insert(j.offset, j)
parsed = parse_melody(m, c)
return abstract_grammars
#----------------------------PUBLIC FUNCTIONS----------------------------------#
''' Get musical data from a MIDI file '''
def get_musical_data(data_fn):
measures, chords = __parse_midi(data_fn)
abstract_grammars = __get_abstract_grammars(measures, chords)
return chords, abstract_grammars
''' Get corpus data from grammatical data '''
def get_corpus_data(abstract_grammars):
corpus = [x for sublist in abstract_grammars for x in sublist.split(' ')]
values = set(corpus)
val_indices = dict((v, i) for i, v in enumerate(values))
indices_val = dict((i, v) for i, v in enumerate(values))
return corpus, values, val_indices, indices_val
马建仓 AI 助手
