1 Star 4 Fork 0

garlong/GiantMIDI-Piano

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
calculate_statistics.py 41.38 KB
一键复制 编辑 原始数据 按行查看 历史
DELL 提交于 2021-04-13 14:54 . update
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050
import os
import sys
import argparse
import pickle
import numpy as np
import matplotlib.pyplot as plt
from utilities import read_midi, TargetProcessor
from dataset import read_csv_to_meta_dict
note_names = [r'$\mathregular{A_{0}}$', r'$\mathregular{A{\sharp}_{0}}$', r'$\mathregular{B_{0}}$',
r'$\bf{C_{1}}$', r'$\mathregular{C{\sharp}_{1}}$', r'$\mathregular{D_{1}}$', r'$\mathregular{D{\sharp}_{1}}$', r'$\mathregular{E_{1}}$', r'$\mathregular{F_{1}}$', r'$\mathregular{F{\sharp}_{1}}$', r'$\mathregular{G_{1}}$', r'$\mathregular{G{\sharp}_{1}}$', r'$\mathregular{A_{1}}$', r'$\mathregular{A{\sharp}_{1}}$', r'$\mathregular{B_{1}}$',
r'$\bf{C_{2}}$', r'$\mathregular{C{\sharp}_{2}}$', r'$\mathregular{D_{2}}$', r'$\mathregular{D{\sharp}_{2}}$', r'$\mathregular{E_{2}}$', r'$\mathregular{F_{2}}$', r'$\mathregular{F{\sharp}_{2}}$', r'$\mathregular{G_{2}}$', r'$\mathregular{G{\sharp}_{2}}$', r'$\mathregular{A_{2}}$', r'$\mathregular{A{\sharp}_{2}}$', r'$\mathregular{B_{2}}$',
r'$\bf{C_{3}}$', r'$\mathregular{C{\sharp}_{3}}$', r'$\mathregular{D_{3}}$', r'$\mathregular{D{\sharp}_{3}}$', r'$\mathregular{E_{3}}$', r'$\mathregular{F_{3}}$', r'$\mathregular{F{\sharp}_{3}}$', r'$\mathregular{G_{3}}$', r'$\mathregular{G{\sharp}_{3}}$', r'$\mathregular{A_{3}}$', r'$\mathregular{A{\sharp}_{3}}$', r'$\mathregular{B_{3}}$',
r'$\bf{C_{4}}$', r'$\mathregular{C{\sharp}_{4}}$', r'$\mathregular{D_{4}}$', r'$\mathregular{D{\sharp}_{4}}$', r'$\mathregular{E_{4}}$', r'$\mathregular{F_{4}}$', r'$\mathregular{F{\sharp}_{4}}$', r'$\mathregular{G_{4}}$', r'$\mathregular{G{\sharp}_{4}}$', r'$\mathregular{A_{4}}$', r'$\mathregular{A{\sharp}_{4}}$', r'$\mathregular{B_{4}}$',
r'$\bf{C_{5}}$', r'$\mathregular{C{\sharp}_{5}}$', r'$\mathregular{D_{5}}$', r'$\mathregular{D{\sharp}_{5}}$', r'$\mathregular{E_{5}}$', r'$\mathregular{F_{5}}$', r'$\mathregular{F{\sharp}_{5}}$', r'$\mathregular{G_{5}}$', r'$\mathregular{G{\sharp}_{5}}$', r'$\mathregular{A_{5}}$', r'$\mathregular{A{\sharp}_{5}}$', r'$\mathregular{B_{5}}$',
r'$\bf{C_{6}}$', r'$\mathregular{C{\sharp}_{6}}$', r'$\mathregular{D_{6}}$', r'$\mathregular{D{\sharp}_{6}}$', r'$\mathregular{E_{6}}$', r'$\mathregular{F_{6}}$', r'$\mathregular{F{\sharp}_{6}}$', r'$\mathregular{G_{6}}$', r'$\mathregular{G{\sharp}_{6}}$', r'$\mathregular{A_{6}}$', r'$\mathregular{A{\sharp}_{6}}$', r'$\mathregular{B_{6}}$',
r'$\bf{C_{7}}$', r'$\mathregular{C{\sharp}_{7}}$', r'$\mathregular{D_{7}}$', r'$\mathregular{D{\sharp}_{7}}$', r'$\mathregular{E_{7}}$', r'$\mathregular{F_{7}}$', r'$\mathregular{F{\sharp}_{7}}$', r'$\mathregular{G_{7}}$', r'$\mathregular{G{\sharp}_{7}}$', r'$\mathregular{A_{7}}$', r'$\mathregular{A{\sharp}_{7}}$', r'$\mathregular{B_{7}}$',
r'$\bf{C_{8}}$']
chroma_names = [r'$\bf{C}$', r'$\mathregular{C{\sharp}}$', r'$\mathregular{D}$', r'$\mathregular{D{\sharp}}$', r'$\mathregular{E}$', r'$\mathregular{F}$', r'$\mathregular{F{\sharp}}$', r'$\mathregular{G}$', r'$\mathregular{G{\sharp}}$', r'$\mathregular{A}$', r'$\mathregular{A{\sharp}}$', r'$\mathregular{B}$']
frames_per_second = 100
begin_note = 21
classes_num = 88
def meta_info(args):
"""Calculate statistics of number of music pieces, nationalities, birth,
etc."""
# Arugments & parameters
workspace = args.workspace
# Paths
csv_path = os.path.join(workspace, 'full_music_pieces_youtube_similarity_pianosoloprob.csv')
statistics_path = os.path.join(workspace, 'statistics.pkl')
os.makedirs(os.path.dirname(statistics_path), exist_ok=True)
meta_dict = read_csv_to_meta_dict(csv_path)
"""keys: ['surname', 'firstname', 'music', 'nationality', 'birth', 'death',
'youtube_title', 'youtube_id', 'similarity', 'piano_solo_prob', 'audio_name']"""
for key in meta_dict.keys():
meta_dict[key] = np.array(meta_dict[key])
# Filter piano solo
indexes = np.where(meta_dict['piano_solo_prob'].astype(np.float32) >= 0.5)[0]
print('Music pieces num: {}'.format(len(indexes)))
# Composers
full_names = []
for idx in indexes:
full_names.append('{}, {}'.format(meta_dict['surname'][idx], meta_dict['firstname'][idx]))
composers = np.array(list(set(full_names)))
print('Composers num: {}'.format(len(composers)))
# Number of works
works_dict = {composer: {'audio_names': [], 'nationality': None,
'birth': None, 'death': None} for composer in composers}
for idx in indexes:
composer = '{}, {}'.format(meta_dict['surname'][idx], meta_dict['firstname'][idx])
works_dict[composer]['audio_names'].append(meta_dict['audio_name'][idx])
works_dict[composer]['nationality'] = meta_dict['nationality'][idx]
works_dict[composer]['birth'] = meta_dict['birth'][idx]
works_dict[composer]['death'] = meta_dict['death'][idx]
number_of_works = np.array([len(works_dict[composer]['audio_names']) for composer in composers])
statistics_dict = {'composers': composers, 'number_of_piano_works': number_of_works}
# Sort by number of works
sorted_idx = np.argsort(number_of_works)[::-1]
sorted_list = []
for idx in sorted_idx:
composer = composers[idx]
sorted_list.append([composer, len(works_dict[composer]['audio_names']),
works_dict[composer]['nationality'], works_dict[composer]['birth'],
works_dict[composer]['death']])
"""E.g., [..., ['Schmitt, Florent', 132, 'French', '1870', '1958'], ...]"""
# Count by nationalities
nationalities = [e[2] for e in sorted_list]
unique_nationalities = list(set(nationalities))
nationalities_count = []
for na in unique_nationalities:
nationalities_count.append(nationalities.count(na))
_idxes = np.argsort(nationalities_count)[::-1]
unique_nationalities = np.array(unique_nationalities)[_idxes]
nationalities_count = np.array(nationalities_count)[_idxes]
print('-------- Nationalities --------')
print('Nationalities:', unique_nationalities)
print('Count:', nationalities_count)
# Plot nationalities
fig_path = 'results/nationalities.pdf'
N = len(nationalities_count)
fig, ax = plt.subplots(1, 1, figsize=(8, 4))
ax.set_xlim(-1, N - 1)
ax.set_ylim(0, 200)
ax.set_xlabel('Nationalities', fontsize=14)
ax.set_ylabel('Number of composers', fontsize=14)
ax.bar(np.arange(N - 1), nationalities_count[1 : N], align='center', color='C0', alpha=1)
ax.xaxis.set_ticks(np.arange(N - 1))
ax.xaxis.set_ticklabels(unique_nationalities[1 : N], rotation=90, fontsize=12)
ax.yaxis.grid(color='k', linestyle='--', linewidth=0.3) # only horizontal grid
plt.tight_layout()
plt.savefig(fig_path)
print('Save fig to {}'.format(fig_path))
# Year
births = [int(e[3]) // 100 for e in sorted_list if e[3] != 'unknown']
unique_births = list(set(births))
births_count = []
for na in unique_births:
births_count.append(births.count(na))
_idxes = np.argsort(births_count)[::-1]
unique_births = np.array(unique_births)[_idxes]
births_count = np.array(births_count)[_idxes]
print('-------- Birth centery --------')
print('Birth centuries:', unique_births)
print('Count:', births_count)
# Lifespan
lifespan = [int(e[4]) - int(e[3]) for e in sorted_list if e[3] != 'unknown']
unique_lifespan = list(set(lifespan))
lifespan_count = []
for na in unique_lifespan:
lifespan_count.append(lifespan.count(na))
_idxes = np.argsort(unique_lifespan)
unique_lifespan = np.array(unique_lifespan)[_idxes]
lifespan_count = np.array(lifespan_count)[_idxes]
print('-------- Lifespan --------')
print('Life span (years):', unique_lifespan)
print('Count:', lifespan_count)
# Dump statistics to disk
pickle.dump(statistics_dict, open(statistics_path, 'wb'))
print('Save to {}'.format(statistics_path))
def plot_composer_works_num(args):
def _get_composer_works_num(meta_dict, indexes, composers):
"""Get the number of works of composers.
Args:
meta_dict, dict, keys: ['surname', 'firstname', 'music', 'nationality',
'birth', 'death', 'youtube_title', 'youtube_id', 'similarity',
'piano_solo_prob', 'audio_name']
indexes: 1darray, e.g., [0, 2, 5, 6, ...]
composers: list
Returns:
number_of_works: (composers_num,)
sorted_indexes: (composers_num,)
"""
# Composers
full_names = []
# Number of works
works_dict = {composer: 0 for composer in composers}
for idx in indexes:
composer = '{}, {}'.format(meta_dict['surname'][idx], meta_dict['firstname'][idx])
if composer in composers:
works_dict[composer] += 1
number_of_works = np.array([works_dict[composer] for composer in composers])
# Sort by number of works
sorted_indexes = np.argsort(number_of_works)[::-1]
return number_of_works, sorted_indexes
# Arugments & parameters
workspace = args.workspace
# Paths
csv_path = os.path.join(workspace, 'full_music_pieces_youtube_similarity_pianosoloprob.csv')
fig_path = 'results/composer_works_num.pdf'
os.makedirs(os.path.dirname(fig_path), exist_ok=True)
meta_dict = read_csv_to_meta_dict(csv_path)
"""keys: ['surname', 'firstname', 'music', 'nationality', 'birth', 'death',
'youtube_title', 'youtube_id', 'similarity', 'piano_solo_prob', 'audio_name']"""
for key in meta_dict.keys():
meta_dict[key] = np.array(meta_dict[key])
# Filter by indexes, larger 1e-6 indicates audio has been downloaded, larger than 0.5 indicates piano solo
all_indexes = np.where(meta_dict['piano_solo_prob'].astype(np.float32) >= 1e-6)[0]
piano_indexes = np.where(meta_dict['piano_solo_prob'].astype(np.float32) >= 0.5)[0]
# Get composer names
piano_composers = []
for idx in piano_indexes:
piano_composers.append('{}, {}'.format(meta_dict['surname'][idx], meta_dict['firstname'][idx]))
piano_composers = list(set(piano_composers))
# Get composer works number
composer_works_num_full, _ = _get_composer_works_num(meta_dict, all_indexes, piano_composers)
composer_works_num_piano, sorted_indexes = _get_composer_works_num(meta_dict, piano_indexes, piano_composers)
# Plot
top_composers = 100
fig, ax = plt.subplots(1, 1, figsize=(20, 6))
ax.set_xlim(-1, top_composers)
ax.set_ylim(0, 300)
ax.set_ylabel('Number of works', fontsize=15)
line1 = ax.bar(np.arange(top_composers), np.array(composer_works_num_full)[sorted_indexes[0 : top_composers]],
align='center', color='pink', alpha=0.5, label='Full works')
line2 = ax.bar(np.arange(top_composers), np.array(composer_works_num_piano)[sorted_indexes[0 : top_composers]],
align='center', color='C0', alpha=1, label='Piano works')
ax.xaxis.set_ticks(np.arange(top_composers))
ax.xaxis.set_ticklabels(np.array(piano_composers)[sorted_indexes[0 : top_composers]], rotation=90, fontsize=13)
ax.tick_params(axis="y", labelsize=13)
ax.yaxis.grid(color='k', linestyle='--', linewidth=0.3)
ax.legend(handles=[line1, line2], fontsize=15, loc=1, framealpha=1.)
plt.tight_layout(0, 0, 0)
plt.savefig(fig_path)
print('Save fig to {}'.format(fig_path))
def plot_composer_durations(args):
def _get_composer_durations(meta_dict, indexes, composers):
"""Get the number of works of composers.
Args:
meta_dict, dict, keys: ['surname', 'firstname', 'music', 'nationality',
'birth', 'death', 'youtube_title', 'youtube_id', 'similarity',
'piano_solo_prob', 'audio_name', 'audio_duration']
indexes: 1darray, e.g., [0, 2, 5, 6, ...]
composers: list
Returns:
durations: (composers_num,)
sorted_indexes: (composers_num,)
"""
# Composers
full_names = []
# Number of works
durations_dict = {composer: 0 for composer in composers}
for idx in indexes:
composer = '{}, {}'.format(meta_dict['surname'][idx], meta_dict['firstname'][idx])
if composer in composers:
durations_dict[composer] += float(meta_dict['audio_duration'][idx]) / 3600
durations = np.array([durations_dict[composer] for composer in composers])
# Sort by number of works
sorted_indexes = np.argsort(durations)[::-1]
return durations, sorted_indexes
# Arugments & parameters
workspace = args.workspace
# Paths
csv_path = os.path.join(workspace, 'full_music_pieces_youtube_similarity_pianosoloprob.csv')
all_music_events_path = os.path.join(workspace, 'all_music_events.pkl')
fig_path = 'results/composer_durations.pdf'
os.makedirs(os.path.dirname(fig_path), exist_ok=True)
meta_dict = read_csv_to_meta_dict(csv_path)
"""keys: ['surname', 'firstname', 'music', 'nationality', 'birth', 'death',
'youtube_title', 'youtube_id', 'similarity', 'piano_solo_prob', 'audio_name']"""
for key in meta_dict.keys():
meta_dict[key] = np.array(meta_dict[key])
# Filter by indexes, larger 1e-6 indicates audio has been downloaded, larger than 0.5 indicates piano solo
all_indexes = np.where(meta_dict['piano_solo_prob'].astype(np.float32) >= 1e-6)[0]
piano_indexes = np.where(meta_dict['piano_solo_prob'].astype(np.float32) >= 0.5)[0]
# Get composer names
piano_composers = []
for idx in piano_indexes:
piano_composers.append('{}, {}'.format(meta_dict['surname'][idx], meta_dict['firstname'][idx]))
piano_composers = list(set(piano_composers))
# Get composer works number
composer_durations_full, _ = _get_composer_durations(meta_dict, all_indexes, piano_composers)
composer_durations_piano, sorted_indexes = _get_composer_durations(meta_dict, piano_indexes, piano_composers)
# Plot
N = 100
fig, ax = plt.subplots(1, 1, figsize=(20, 6))
ax.set_xlim(-1, N)
ax.set_ylim(0, 40)
ax.set_ylabel('Durations (h)', fontsize=15)
line1 = ax.bar(np.arange(N), np.array(composer_durations_full)[sorted_indexes[0 : N]],
align='center', color='pink', alpha=0.5, label='Full works')
line2 = ax.bar(np.arange(N), np.array(composer_durations_piano)[sorted_indexes[0 : N]],
align='center', color='C0', alpha=1, label='Piano works')
ax.xaxis.set_ticks(np.arange(N))
ax.xaxis.set_ticklabels(np.array(piano_composers)[sorted_indexes[0 : N]], rotation=90, fontsize=13)
ax.tick_params(axis="y", labelsize=13)
ax.yaxis.grid(color='k', linestyle='--', linewidth=0.3)
ax.legend(handles=[line1, line2], fontsize=15, loc=1, framealpha=1.)
plt.tight_layout(0, 0, 0)
plt.savefig(fig_path)
print('Save fig to {}'.format(fig_path))
def calculate_music_events_from_midi(args):
# Arugments & parameters
workspace = args.workspace
midis_dir = os.path.join(workspace, "midis")
all_music_events_path = os.path.join(workspace, 'all_music_events.pkl')
composer = 'Beethoven'
midi_names = sorted(os.listdir(midis_dir))
print('{} num: {}'.format(composer, len(midi_names)))
all_music_events_dict = {}
for n, midi_name in enumerate(midi_names):
print(n, midi_name)
midi_path = os.path.join(midis_dir, midi_name)
midi_dict = read_midi(midi_path)
segment_seconds = midi_dict['midi_event_time'][-1]
target_processor = TargetProcessor(segment_seconds, frames_per_second,
begin_note, classes_num)
(note_events, pedal_events) = target_processor.process(
0, midi_dict['midi_event_time'], midi_dict['midi_event'])
bare_name = os.path.splitext(midi_name)[0]
all_music_events_dict[bare_name] = {
'note_events': note_events,
'pedal_events': pedal_events,
'segment_seconds': segment_seconds}
pickle.dump(all_music_events_dict, open(all_music_events_path, 'wb'))
print('Dump to {}'.format(all_music_events_path))
def plot_note_histogram(args):
# Arugments & parameters
workspace = args.workspace
# Paths
all_music_events_path = os.path.join(workspace, 'all_music_events.pkl')
fig_path = 'results/note_histogram.pdf'
os.makedirs(os.path.dirname(fig_path), exist_ok=True)
# Load events
all_music_events = pickle.load(open(all_music_events_path, 'rb'))
print('Load finish.')
all_names = sorted([name for name in all_music_events.keys()])
all_piano_notes = []
for name in all_names:
note_events = all_music_events[name]['note_events']
piano_notes = [note_event['midi_note'] - begin_note for note_event in note_events]
all_piano_notes += piano_notes
counts = count_notes(all_piano_notes)
fig, ax = plt.subplots(1, 1, figsize=(20, 3))
ax.bar(np.arange(classes_num), counts, align='center')
ax.xaxis.set_ticks(np.arange(classes_num))
ax.xaxis.set_ticklabels(note_names, fontsize=10)
ax.yaxis.set_ticks([0, 2e5, 4e5, 6e5, 8e5, 1e6, 1.2e6])
ax.yaxis.set_ticklabels(['0', '200,000', '400,000', '600,000', '800,000',
'1,000,000', '1,200,000'], fontsize=10)
ax.set_xlim(-1, classes_num)
ax.set_ylabel('Number of notes', fontsize=15)
plt.tight_layout()
plt.savefig(fig_path)
print('Save to {}'.format(fig_path))
print('Total notes: {}'.format(counts))
def plot_mean_std_notes(args):
# Arugments & parameters
workspace = args.workspace
# Paths
all_music_events_path = os.path.join(workspace, 'all_music_events.pkl')
meta_path = os.path.join(workspace, 'statistics.pkl')
fig_path = 'results/note_mean_std.pdf'
os.makedirs(os.path.dirname(fig_path), exist_ok=True)
# Load meta
statistics_dict = pickle.load(open(meta_path, 'rb'))
sorted_idxes = np.argsort(statistics_dict['number_of_piano_works'])[::-1]
composers = statistics_dict['composers'][sorted_idxes[0 : 100]]
# Load events
all_music_events = pickle.load(open(all_music_events_path, 'rb'))
print('Load finish.')
all_names = sorted([name for name in all_music_events.keys()])
stat_dict = {}
# Calculate the mean and standard of notes of all composers
for composer in composers:
names = []
for name in all_names:
[surname, firstname] = name.split(', ')[0 : 2]
if surname == composer.split(', ')[0] and firstname == composer.split(', ')[1]:
names.append(name)
all_piano_notes = []
for name in names:
note_events = all_music_events[name]['note_events']
piano_notes = [note_event['midi_note'] - begin_note for note_event in note_events]
all_piano_notes += piano_notes
counts = count_notes(all_piano_notes)
mean_ = np.sum(counts * np.arange(classes_num)) / np.sum(counts)
std_ = np.sqrt(np.sum((np.arange(classes_num) - mean_) ** 2 * counts) / np.sum(counts))
stat_dict[composer] = {'mean': mean_, 'std': std_}
# Sort by ascending order
mean_array = np.array([stat_dict[composer]['mean'] for composer in composers])
std_array = np.array([stat_dict[composer]['std'] for composer in composers])
sorted_idxes = np.argsort(mean_array)
mean_array = mean_array[sorted_idxes]
std_array = std_array[sorted_idxes]
# Plot
fig, ax = plt.subplots(1, 1, figsize=(20, 6))
top_composers = 100
ax.plot(std_array[sorted_idxes], c='r')
(markerline, stemlines, baseline) = ax.stem(mean_array)
# ax.setp(markerline, color='r', alpha=0.3)
ax.fill_between(np.arange(top_composers), mean_array - std_array / 2, mean_array + std_array / 2, alpha=0.3)
ax.plot(39 * np.ones(top_composers), linestyle='--', c='k')
ax.xaxis.set_ticks(np.arange(top_composers))
ax.xaxis.set_ticklabels(composers[sorted_idxes], rotation=90, fontsize=13)
ax.yaxis.set_ticks([15, 27, 39, 51, 63])
ax.yaxis.set_ticklabels(['$C_{2}$', '$C_{3}$', '$C_{4}$', '$C_{5}$', '$C_{6}$'], fontsize=13)
ax.set_ylabel('Note names', fontsize=15)
ax.set_xlim(-1, 100)
ax.set_ylim((20, 60))
plt.tight_layout()
plt.savefig(fig_path)
print('Save to {}'.format(fig_path))
def mean_of_histogram(x):
return np.sum(x * np.arange(len(x))) / np.sum(x)
def std_of_histogram(x):
mean_ = mean_of_histogram(x)
return np.sqrt(np.sum((np.arange(len(x)) - mean_) ** 2 * x) / np.sum(x))
def plot_notes_per_second_mean_std(args):
# Arugments & parameters
workspace = args.workspace
# Paths
all_music_events_path = os.path.join(workspace, 'all_music_events.pkl')
meta_path = os.path.join(workspace, 'statistics.pkl')
fig_path = 'results/notes_per_second_mean_std.pdf'
os.makedirs(os.path.dirname(fig_path), exist_ok=True)
# Load meta
statistics_dict = pickle.load(open(meta_path, 'rb'))
sorted_idxes = np.argsort(statistics_dict['number_of_piano_works'])[::-1]
composers = statistics_dict['composers'][sorted_idxes[0 : 100]]
# Load events
all_music_events = pickle.load(open(all_music_events_path, 'rb'))
print('Load finish.')
all_names = sorted([name for name in all_music_events.keys()])
stat_dict = {}
# Calculate the mean and standard of notes of all composers
for composer in composers:
names = []
for name in all_names:
[surname, firstname] = name.split(', ')[0 : 2]
if surname == composer.split(', ')[0] and firstname == composer.split(', ')[1]:
names.append(name)
notes_per_second = np.zeros(100)
for name in names:
note_events = all_music_events[name]['note_events']
onset_times = np.array([note_event['onset_time'] for note_event in note_events])
# Calculate the number of notes in 1 second
tmp = onset_times.astype(np.int32)
unique, counts = np.unique(tmp, return_counts=True)
for e in counts:
notes_per_second[e] += 1
mean_ = mean_of_histogram(notes_per_second)
std_ = std_of_histogram(notes_per_second)
stat_dict[composer] = {'mean': mean_, 'std': std_}
# Sort by ascending order
mean_array = np.array([stat_dict[composer]['mean'] for composer in composers])
std_array = np.array([stat_dict[composer]['std'] for composer in composers])
sorted_idxes = np.argsort(mean_array)
mean_array = mean_array[sorted_idxes]
std_array = std_array[sorted_idxes]
# Plot
top_composers = 100
fig, ax = plt.subplots(1, 1, figsize=(20, 6))
(markerline, stemlines, baseline) = ax.stem(mean_array)
# ax.setp(stemlines, color='b', alpha=0.3)
ax.fill_between(np.arange(top_composers), mean_array - std_array / 2, mean_array + std_array / 2, alpha=0.3)
ax.xaxis.set_ticks(np.arange(top_composers))
ax.xaxis.set_ticklabels(composers[sorted_idxes], rotation=90, fontsize=13)
ax.yaxis.set_ticks([0, 5, 10, 15, 20])
ax.yaxis.set_ticklabels([0, 5, 10, 15, 20], fontsize=13)
ax.set_ylabel('Number of notes per second', fontsize=15)
ax.set_xlim(-1, top_composers)
ax.set_ylim((0, 20))
plt.tight_layout()
plt.savefig(fig_path)
print('Save to {}'.format(fig_path))
def plot_selected_composers_note_histogram(args):
# Arugments & parameters
workspace = args.workspace
# Paths
all_music_events_path = os.path.join(workspace, 'all_music_events.pkl')
meta_path = os.path.join(workspace, 'statistics.pkl')
fig_path = 'results/selected_composers_note_histogram.pdf'
os.makedirs(os.path.dirname(fig_path), exist_ok=True)
# Load meta
statistics_dict = pickle.load(open(meta_path, 'rb'))
sorted_idxes = np.argsort(statistics_dict['number_of_piano_works'])[::-1]
composers = ['Bach, Johann Sebastian', 'Beethoven, Ludwig van', 'Liszt, Franz']
# Load events
all_music_events = pickle.load(open(all_music_events_path, 'rb'))
print('Load finish.')
# All music piece names
all_names = sorted([name for name in all_music_events.keys()])
fig, axs = plt.subplots(len(composers), 1, sharex=False, figsize=(20, 6))
for j, composer in enumerate(composers):
names = []
all_piano_notes = []
# Select the music pieces of a specific composer
for name in all_names:
[surname, firstname] = name.split(', ')[0 : 2]
if surname == composer.split(', ')[0] and firstname == composer.split(', ')[1]:
names.append(name)
# Collect all notes of a specific composer
for name in names:
note_events = all_music_events[name]['note_events']
piano_notes = [note_event['midi_note'] - begin_note for note_event in note_events]
all_piano_notes += piano_notes
counts = count_notes(all_piano_notes)
axs[j].set_title(composer)
axs[j].set_ylabel('Number of notes', fontsize=12)
axs[j].set_xlim(-1, classes_num)
axs[j].bar(np.arange(classes_num), counts, align='center')
axs[j].xaxis.set_ticks(np.arange(classes_num))
axs[j].xaxis.set_ticklabels(note_names, fontsize=10)
axs[j].yaxis.set_ticks([0, 1e4, 2e4, 3e4])
axs[j].yaxis.set_ticklabels(['0', '10,000', '20,000', '30,000'], fontsize=12)
plt.tight_layout()
plt.savefig(fig_path)
print('Save fig to {}'.format(fig_path))
def plot_selected_composers_chroma(args):
def _count_chroma(piano_notes):
counts = np.zeros(12)
for k in range(88):
counts[(k - 3) % 12] += piano_notes.count(k)
return counts
# Arugments & parameters
workspace = args.workspace
# Paths
all_music_events_path = os.path.join(workspace, 'all_music_events.pkl')
meta_path = os.path.join(workspace, 'statistics.pkl')
fig_path = 'results/selected_composers_chroma.pdf'
os.makedirs(os.path.dirname(fig_path), exist_ok=True)
# Load meta
statistics_dict = pickle.load(open(meta_path, 'rb'))
sorted_idxes = np.argsort(statistics_dict['number_of_piano_works'])[::-1]
composers = composers = ['Bach, Johann Sebastian', 'Mozart, Wolfgang Amadeus',
'Beethoven, Ludwig van', 'Chopin, Frédéric', 'Liszt, Franz', 'Debussy, Claude']
# Load events
all_music_events = pickle.load(open(all_music_events_path, 'rb'))
print('Load finish.')
# All music piece names
all_names = sorted([name for name in all_music_events.keys()])
fig, axs = plt.subplots(2, 3, sharex=False, figsize=(10, 4))
for j, composer in enumerate(composers):
names = []
all_piano_notes = []
# Select the music pieces of a specific composer
for name in all_names:
[surname, firstname] = name.split(', ')[0 : 2]
if surname == composer.split(', ')[0] and firstname == composer.split(', ')[1]:
names.append(name)
# Collect all notes of a specific composer
for name in names:
note_events = all_music_events[name]['note_events']
piano_notes = [note_event['midi_note'] - begin_note for note_event in note_events]
all_piano_notes += piano_notes
chroma_counts = _count_chroma(all_piano_notes)
chroma_frequency = chroma_counts / np.sum(chroma_counts)
chroma_num = 12
axs[j // 3, j % 3].set_title(composer)
axs[j // 3, j % 3].set_ylabel('Frequency')
axs[j // 3, j % 3].set_ylim(0, 0.15)
axs[j // 3, j % 3].yaxis.grid(color='k', linestyle='--', linewidth=0.3)
axs[j // 3, j % 3].bar(np.arange(chroma_num), chroma_frequency, align='center')
axs[j // 3, j % 3].xaxis.set_ticks(np.arange(chroma_num))
axs[j // 3, j % 3].xaxis.set_ticklabels(chroma_names, fontsize=12)
plt.tight_layout()
plt.savefig(fig_path)
print('Save fig to {}'.format(fig_path))
def plot_selected_composers_intervals(args):
def _count_interval(piano_notes):
piano_notes = np.array(piano_notes)
diff = piano_notes[1 :] - piano_notes[0 : -1]
counts = np.zeros(23)
for k in range(-11, 12):
counts[k + 11] = diff.tolist().count(k)
return counts
# Arugments & parameters
workspace = args.workspace
# Paths
all_music_events_path = os.path.join(workspace, 'all_music_events.pkl')
meta_path = os.path.join(workspace, 'statistics.pkl')
fig_path = 'results/selected_composers_intervals.pdf'
os.makedirs(os.path.dirname(fig_path), exist_ok=True)
# Load meta
statistics_dict = pickle.load(open(meta_path, 'rb'))
sorted_idxes = np.argsort(statistics_dict['number_of_piano_works'])[::-1]
composers = composers = ['Bach, Johann Sebastian', 'Mozart, Wolfgang Amadeus',
'Beethoven, Ludwig van', 'Chopin, Frédéric', 'Liszt, Franz', 'Debussy, Claude']
# Load events
all_music_events = pickle.load(open(all_music_events_path, 'rb'))
print('Load finish.')
# All music piece names
all_names = sorted([name for name in all_music_events.keys()])
fig, axs = plt.subplots(2, 3, sharex=False, figsize=(10, 4))
for j, composer in enumerate(composers):
names = []
all_piano_notes = []
# Select the music pieces of a specific composer
for name in all_names:
[surname, firstname] = name.split(', ')[0 : 2]
if surname == composer.split(', ')[0] and firstname == composer.split(', ')[1]:
names.append(name)
# Collect all notes of a specific composer
for name in names:
note_events = all_music_events[name]['note_events']
piano_notes = [note_event['midi_note'] - begin_note for note_event in note_events]
all_piano_notes += piano_notes
interval_counts = _count_interval(all_piano_notes)
interval_frequency = interval_counts / np.sum(interval_counts)
intervals_num = 23 # From downward major 7-th to upward major 7-th
axs[j // 3, j % 3].set_title(composer, fontsize=12)
axs[j // 3, j % 3].set_xlim(-1, intervals_num)
axs[j // 3, j % 3].set_ylim(0, 0.15)
axs[j // 3, j % 3].yaxis.grid(color='k', linestyle='--', linewidth=0.3)
axs[j // 3, j % 3].bar(np.arange(intervals_num), interval_frequency, align='center')
axs[j // 3, j % 3].xaxis.set_ticks(np.arange(intervals_num))
labels = ['-11', '\n-10', '-9', '\n-8', '-7', '\n-6', '-5', '\n-4', '-3', '\n-2', '-1', '\n0', '1', '\n2', '3', '\n4', '5', '\n6', '7', '\n8', '9', '\n10', '11']
axs[j // 3, j % 3].xaxis.set_ticklabels(labels, fontsize=11, rotation='0')
if j in [0, 3]:
axs[j // 3, j % 3].set_ylabel('Frequency', fontsize=12)
if j in [1, 2, 4, 5]:
axs[j // 3, j % 3].tick_params(labelleft='off')
plt.tight_layout()
plt.savefig(fig_path)
print('Save fig to {}'.format(fig_path))
def plot_selected_composers_chords(args):
def _count_chord(note_events):
"""Count chords. If multiple notes are within 50 ms, then we call them
a chord. The chord is transposed so that its bass note is C. Then,
all notes are moved to a same octave.
Args:
note_events: list, e.g., [
{'midi_note': 77, 'onset_time': 177.96, 'offset_time': 180.29, 'velocity': 103},
{'midi_note': 29, 'onset_time': 178.64, 'offset_time': 180.34, 'velocity': 87},
...]
Returns:
chord_dict, dict, e.g.,
{'{0}': 5872, '{0, 7}': 386, '{0, 2, 4, 5, 9, 10}': 2, '{0, 3}': 777, ...}
"""
chord_dict = {}
anchor_time = 0
chord = [0]
delta_time = 0.05
for n in range(len(note_events) - 10):
event_time = note_events[n]['onset_time']
piano_note = note_events[n]['midi_note'] - 21
if event_time - anchor_time > delta_time:
"""Collect chord"""
tmp = np.array(chord)
tmp -= np.min(tmp) # Transpose the chord so that its bass note is C
tmp %= 12 # Move all notes to a same octave
tmp = str(set(sorted(tmp)))
if tmp in chord_dict.keys():
chord_dict[tmp] += 1
else:
chord_dict[tmp] = 1
anchor_time = event_time
chord = [piano_note]
else:
"""Append notes to a chord"""
chord.append(piano_note)
return chord_dict
def sort_dict(chord_dict):
"""Get chord list by their descending chord numer
Args:
chord_dict: dict, e.g.,
{'{0}': 5872, '{0, 7}': 386, '{0, 2, 4, 5, 9, 10}': 2, '{0, 3}': 777, ...}
Returns:
sorted_list: list, e.g.,
[('{0}', 10497), ('{0, 3}', 1517), ('{0, 4}', 1302), ...]
"""
sorted_list = [(key, chord_dict[key]) for key in chord_dict.keys()]
sorted_list.sort(key=lambda e: e[1], reverse=True)
return sorted_list
def _merge_inversion(sorted_list, chord_dict):
"""Merge inversions of a chord.
Args:
sorted_list: list, e.g.,
[('{0}', 10497), ('{0, 3}', 1517), ('{0, 4}', 1302), ...]
Output:
merged_chord_dict: dict, .e.g.,
[{'{0}': 10497, '{0, 3}': 2763, '{0, 4}': 2182, ...]
"""
merged_chord_dict = {}
for pair in sorted_list:
inversions = _get_inversion_list(pair[0])
exist = False
for inv in inversions:
if inv in merged_chord_dict.keys():
exist = True
break
if not exist:
merged_chord_dict[pair[0]] = 0
for inversion in inversions:
if inversion in chord_dict.keys():
merged_chord_dict[pair[0]] += chord_dict[inversion]
return merged_chord_dict
def _get_inversion_list(key):
chord = eval(key)
chord = np.array(list(chord))
inversions = []
for c in chord:
inversions.append(str(set(sorted((chord - c) % 12))))
return inversions
def _get_chords_with_n_notes(sorted_list, n_chord):
output_list = []
for e in sorted_list:
if e[0].count(',') == n_chord - 1:
output_list.append(e)
return output_list
def _normalize_list(input_list):
"""Histogram to frequency."""
total = np.sum([e[1] for e in input_list])
output_list = []
for e in input_list:
output_list.append((e[0], e[1] / total))
return output_list
# Arugments & parameters
workspace = args.workspace
n_chords = args.n_chords
top_chords = 6 # Number of chords to plot
# Paths
all_music_events_path = os.path.join(workspace, 'all_music_events.pkl')
meta_path = os.path.join(workspace, 'statistics.pkl')
fig_path = 'results/selected_composers_chords_{}.pdf'.format(n_chords)
os.makedirs(os.path.dirname(fig_path), exist_ok=True)
# Load meta
statistics_dict = pickle.load(open(meta_path, 'rb'))
sorted_idxes = np.argsort(statistics_dict['number_of_piano_works'])[::-1]
composers = composers = ['Bach, Johann Sebastian', 'Mozart, Wolfgang Amadeus',
'Beethoven, Ludwig van', 'Chopin, Frédéric', 'Liszt, Franz', 'Debussy, Claude']
# Load events
all_music_events = pickle.load(open(all_music_events_path, 'rb'))
print('Load finish.')
# All music piece names
all_names = sorted([name for name in all_music_events.keys()])
if n_chords == 3:
figsize = (10, 4)
elif n_chords == 4:
figsize = (10, 4.3)
fig, axs = plt.subplots(2, 3, sharex=False, figsize=figsize)
for j, composer in enumerate(composers):
names = []
all_note_events = []
# Select the music pieces of a specific composer
for name in all_names:
[surname, firstname] = name.split(', ')[0 : 2]
if surname == composer.split(', ')[0] and firstname == composer.split(', ')[1]:
names.append(name)
# Collect all note events of a specific composer
for name in names:
note_events = all_music_events[name]['note_events']
all_note_events += note_events
"""E.g., [
{'midi_note': 77, 'onset_time': 177.96, 'offset_time': 180.29, 'velocity': 103},
{'midi_note': 29, 'onset_time': 178.64, 'offset_time': 180.34, 'velocity': 87},
...]"""
# Count number of chords
chord_dict = _count_chord(all_note_events)
"""E.g., {'{0}': 5872, '{0, 7}': 386, '{0, 2, 4, 5, 9, 10}': 2, '{0, 3}': 777, ...}"""
# Sort the number of chords by descending order
sorted_list = sort_dict(chord_dict)
"""E.g., [('{0}', 10497), ('{0, 3}', 1517), ('{0, 4}', 1302), ...]"""
# Merge inversion of chords to the most frequent inversion
chord_dict = _merge_inversion(sorted_list, chord_dict)
"""E.g., [{'{0}': 10497, '{0, 3}': 2763, '{0, 4}': 2182, ...]"""
# Sort the number of chords by descending order
sorted_list = sort_dict(chord_dict)
"""E.g., [('{0}', 10497), ('{0, 3}', 2763), ('{0, 4}', 2182), ...]"""
# Get chords with n notes
sorted_list = _get_chords_with_n_notes(sorted_list, n_chords)
# Normalize to frequency
sorted_list = _normalize_list(sorted_list)
# Choose top chords
sorted_list = sorted_list[0 : top_chords]
ids = [e[0] for e in sorted_list]
counts = [e[1] for e in sorted_list]
axs[j // 3, j % 3].set_title(composer, fontsize=12)
axs[j // 3, j % 3].bar(np.arange(len(counts)), counts, align='center')
axs[j // 3, j % 3].set_ylim(0, 0.4)
axs[j // 3, j % 3].yaxis.grid(color='k', linestyle='--', linewidth=0.3)
axs[j // 3, j % 3].xaxis.set_ticks(np.arange(top_chords))
axs[j // 3, j % 3].xaxis.set_ticklabels(ids, fontsize=8, rotation=90)
if j in [0, 3]:
axs[j // 3, j % 3].set_ylabel('Frequency', fontsize=12)
if j in [1, 2, 4, 5]:
axs[j // 3, j % 3].tick_params(labelleft='off')
plt.tight_layout(0, 1, 1)
plt.savefig(fig_path)
print('Save fig to {}'.format(fig_path))
def count_notes(piano_notes):
counts = []
for k in range(88):
counts.append(piano_notes.count(k))
return counts
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Example of parser. ')
subparsers = parser.add_subparsers(dest='mode')
parser_meta_info = subparsers.add_parser('meta_info')
parser_meta_info.add_argument('--workspace', type=str, required=True)
parser_plot_composer_works_num = subparsers.add_parser('plot_composer_works_num')
parser_plot_composer_works_num.add_argument('--workspace', type=str, required=True)
parser_plot_composer_durations = subparsers.add_parser('plot_composer_durations')
parser_plot_composer_durations.add_argument('--workspace', type=str, required=True)
parser_events = subparsers.add_parser('calculate_music_events_from_midi')
parser_events.add_argument('--workspace', type=str, required=True)
parser_plot_note_histogram = subparsers.add_parser('plot_note_histogram')
parser_plot_note_histogram.add_argument('--workspace', type=str, required=True)
parser_plot_mean_std_notes = subparsers.add_parser('plot_mean_std_notes')
parser_plot_mean_std_notes.add_argument('--workspace', type=str, required=True)
parser_plot_notes_per_second_mean_std = subparsers.add_parser('plot_notes_per_second_mean_std')
parser_plot_notes_per_second_mean_std.add_argument('--workspace', type=str, required=True)
parser_plot_selected_composers_note_histogram = subparsers.add_parser('plot_selected_composers_note_histogram')
parser_plot_selected_composers_note_histogram.add_argument('--workspace', type=str, required=True)
parser_plot_selected_composers_chroma = subparsers.add_parser('plot_selected_composers_chroma')
parser_plot_selected_composers_chroma.add_argument('--workspace', type=str, required=True)
parser_plot_selected_composers_intervals = subparsers.add_parser('plot_selected_composers_intervals')
parser_plot_selected_composers_intervals.add_argument('--workspace', type=str, required=True)
parser_plot_selected_composers_chords = subparsers.add_parser('plot_selected_composers_chords')
parser_plot_selected_composers_chords.add_argument('--workspace', type=str, required=True)
parser_plot_selected_composers_chords.add_argument('--n_chords', type=int, required=True)
parser_c = subparsers.add_parser('note_intervals')
parser_c.add_argument('--workspace', type=str, required=True)
args = parser.parse_args()
if args.mode == 'meta_info':
meta_info(args)
elif args.mode == 'plot_composer_works_num':
plot_composer_works_num(args)
elif args.mode == 'plot_composer_durations':
plot_composer_durations(args)
elif args.mode == 'calculate_music_events_from_midi':
calculate_music_events_from_midi(args)
elif args.mode == 'plot_note_histogram':
plot_note_histogram(args)
elif args.mode == 'plot_mean_std_notes':
plot_mean_std_notes(args)
elif args.mode == 'plot_notes_per_second_mean_std':
plot_notes_per_second_mean_std(args)
elif args.mode == 'plot_pedals_per_piece_mean_std':
plot_pedals_per_piece_mean_std(args)
elif args.mode == 'plot_selected_composers_note_histogram':
plot_selected_composers_note_histogram(args)
elif args.mode == 'plot_selected_composers_chroma':
plot_selected_composers_chroma(args)
elif args.mode == 'plot_selected_composers_intervals':
plot_selected_composers_intervals(args)
elif args.mode == 'plot_selected_composers_chords':
plot_selected_composers_chords(args)
elif args.mode == 'note_intervals':
note_intervals(args)
else:
raise Exception('Incorrect argument!')
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/garlong/giant-midi-piano.git
git@gitee.com:garlong/giant-midi-piano.git
garlong
giant-midi-piano
GiantMIDI-Piano
master

搜索帮助

0d507c66 1850385 C8b1a773 1850385