1 Star 0 Fork 0

zhoub86/LPCNet

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
process.sh 11.45 KB
一键复制 编辑 原始数据 按行查看 历史
#!/bin/bash -x
# process.sh
# David Rowe Jan 2019
#
: '
1. Process an input set of wave files using LPCNet under a variety of conditions.
2. Name output files to make them convenient to listen to in a file manager.
3. Generate a HTML table of samples for convenient replay on the web.
4. Generate a bunch of other HTML files and PNGs.
usage: ./process.sh [--lite] OutPath
./process.sh ~/tmp/lpcnet_out
To compare generate new samples OutPathA, and compare with those from a previous
run of this script in OutPathB:
./process.sh OutPathA OutPathB
--lite generates a much smaller page with just the basic LPCNet model case
'
# command line arguments
if [ $# -lt 1 ]; then
echo "usage: ./process2.sh [--lite] /output/path/1 [/output/path/2]"
echo " $ ./process.sh ~/tmp/lpcnet_outA"
exit 1
fi
lite=0
for i in "$@"
do
case $i in
--lite)
lite=1
shift
;;
esac
done
OUTPATH=$1
if [ $# -eq 2 ]; then
OUTPATHB=$2
fi
# set these paths to suit your system
CODEC2_PATH=$HOME/codec2-dev/build_linux/src
WAVIN_PATH=$HOME/Desktop/deep/quant
WAVOUT_PATH=$OUTPATH/wav
PATH=$PATH:$CODEC2_PATH
STATS=$OUTPATH/stats.txt
HTML=$OUTPATH/index.html
PNG_PATH=$OUTPATH/png
F32_PATH=$OUTPATH/f32
SV_PATH=$OUTPATH/sv
WAV_FILES="all birch canadian glue oak separately wanted wia"
# check we can find wave files
for f in $WAV_INFILES
do
if [ ! -e $WAVIN_PATH/$f.wav ]; then
echo "$WAVIN_PATH/$f.wav Not found"
fi
done
# check we can find codec 2 tools
if [ ! -e $CODEC2_PATH/c2enc ]; then
echo "$CODEC2_PATH/c2enc not found"
fi
#
# OK lets start processing ------------------------------------------------
#
mkdir -p $F32_PATH
mkdir -p $SV_PATH
mkdir -p $WAVOUT_PATH
mkdir -p $PNG_PATH
rm -f $STATS
vq=pred_v2_stage
vq2=split_stage
# cp in originals
for f in $WAV_FILES
do
cp $WAVIN_PATH/$f.wav $WAVOUT_PATH/$f'_0_orig.wav'
done
# Unquantised, baseline analysis-synthesis model, 10ms updates
for f in $WAV_FILES
do
sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \
./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_1_uq'.wav
done
if [ $lite -eq 0 ]; then
# 3dB uniform quantiser, 10ms updates
for f in $WAV_FILES
do
label=$(printf "3dB %-10s" "$f")
sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \
./quant_feat -l "$label" -d 1 --uniform 3 2>>$STATS | ./test_lpcnet - - | \
sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_2_3dB'.wav
done
# decimate features to 20ms updates, then linearly interpolate back up to 10ms updates
for f in $WAV_FILES
do
sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \
./quant_feat -d 2 | ./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_3_20ms'.wav
done
# decimate features to 20ms updates, then linearly interpolate back up to 10ms updates, incl pitch + voicing quant
for f in $WAV_FILES
do
sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \
./quant_feat -d 2 -o 6 | ./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_4_20ms_pq'.wav
done
# 33 bit 3 stage VQ searched with mbest algorithm, 20ms updates
for f in $WAV_FILES
do
label=$(printf "33bit_20ms %-10s" "$f")
sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \
./quant_feat -l "$label" -d 2 -o 6 -w --mbest 5 -q $vq'1.f32',$vq'2.f32',$vq'3.f32' -s $SV_PATH/$f'_5_33bit_20ms'.txt 2>>$STATS | \
./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_5_33bit_20ms'.wav
done
# 33 bit 3 stage VQ searched with mbest algorithm, 30ms updates
for f in $WAV_FILES
do
label=$(printf "33bit_30ms %-10s" "$f")
sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \
./quant_feat -l "$label" -d 3 -o 6 -w --mbest 5 -q $vq'1.f32',$vq'2.f32',$vq'3.f32' -s $SV_PATH/$f'_6_33bit_30ms'.txt 2>>$STATS | \
./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_6_33bit_30ms'.wav
done
# 44 bit 4 stage VQ searched with mbest algorithm, 30ms updates
for f in $WAV_FILES
do
label=$(printf "44bit_30ms %-10s" "$f")
sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \
./quant_feat -l "$label" -d 3 -o 6 -w --mbest 5 -q $vq'1.f32',$vq'2.f32',$vq'3.f32',$vq'4.f32' -s $SV_PATH/$f'_7_44bit_30ms'.txt 2>>$STATS | \
./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_7_44bit_30ms'.wav
done
# non-predictive (direct) 44 bit 4 stage split VQ searched with mbest algorithm, 30ms updates
for f in $WAV_FILES
do
label=$(printf "44bit_sp_30ms %-10s" "$f")
sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \
./quant_feat -l "$label" -d 3 -o 6 -i -p 0 --mbest 5 -q $vq2'1.f32',$vq2'2.f32',$vq2'3.f32',$vq2'4.f32' -s $SV_PATH/$f'_8_44bit_sp_30ms'.txt 2>>$STATS | \
./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_8_44bit_sp_30ms'.wav
done
fi # ... if [ $lite -eq 0 ] ...
#
# Create a HTML table of samples ----------------------------------------------------
#
cat << EOF > $HTML
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
<html>
<head>
<title>LPCNet Samples</title>
</head>
<body>
EOF
if [ $lite -eq 0 ]; then
cat << EOF > $HTML
<table>
<col width="10%">
<col width="70%">
<caption>Glossary</caption>
<tr><th align="left">Term</th><th align="left">Description</th></tr>
<tr><td>Orig</td><td>Original source input speech</td></tr>
<tr><td>UQ</td><td>Baseline LPCNet synthesis using unquantised features</td></tr>
<tr><td>3dB</td><td>Cesptral features uniform quantiser with 3dB steps</td></tr>
<tr>
<td>20ms</td>
<td>Cesptral features decimated to 20ms frame rate, linear interpolation back to 10ms</td>
</tr>
<tr>
<td>20ms_pq</td>
<td>As above but pitch quantised to 6 bits, pitch gain to 2 bits</td>
</tr>
<tr>
<td>33bit_20ms</td>
<td>3 stage VQ of prediction error, 11 bits/stage, at 20ms frame rate, (33+8)/0.02 = 2050 bits/s</td>
</tr>
<tr>
<td>33bit_30ms</td>
<td>Same 33 bit VQ, but decimated down to 30ms rate, (33+8)/0.03 = 1367 bits/s</td>
</tr>
<tr>
<td>44bit_30ms</td>
<td>4 stage VQ, at 30ms update rate, (44+8)/0.03 = 1733 bits/s</td>
</tr>
<tr>
<td>44bit_sp_30ms</td>
<td>Direct (non predictive) 4 stage split VQ, at 30ms update rate, (44+8)/0.03 = 1733 bits/s. First 3 stages are 18 elements wide, last stage is just 12. We quantise log magnitudes (Ly) rather than Ceptrals (dct(Ly)). Targeted at HF radio channel where predictive schemes perform poorly due to high bit error/packet error rate</td>
</tr>
</table>
<p>
EOF
fi
function heading_row {
w=$(echo $WAV_FILES | cut -d ' ' -f 1)
headings=$(ls $WAVOUT_PATH/$w* | sed -r "s/.*$w.[[:digit:]]_//" | sed -r 's/.wav//')
printf "<tr>\n <th align="left">Sample</th>\n" >> $HTML
for h in $headings
do
printf " <th>%s</th>\n" $h >> $HTML
done
printf "</tr>\n" >> $HTML
}
# for each wave file, create a row
printf "<table>\n" >> $HTML
printf "<caption>Samples</caption>\n" >> $HTML
heading_row
for f in $WAV_FILES
do
files=$(ls $WAVOUT_PATH/$f*);
printf "<tr>\n <td>%s</td>\n" $f >> $HTML
for w in $files
do
b=$(basename $w)
if [ -z "${OUTPATHB}" ]; then
# no comparison
printf " <td align="center"><a href=\"wav/%s\">play</a></td>\n" $b >> $HTML
else
# compare with another process.sh run
printf " <td align="center"><a href=\"wav/%s\">play</a> (<a href=\"%s\">playB</a>) </td>\n" $b $OUTPATHB/wav/$b >> $HTML
fi
done
printf "</tr>\n" >> $HTML
done
printf "</table><p>\n" >> $HTML
# HTML table of results ---------------------------------------------------------
function table_of_values {
printf "<table>\n" >> $HTML
printf "<caption>%s</caption>\n" "$2" >> $HTML
heading_row
# for each wave file, create a row
for f in $WAV_FILES
do
files=$(ls $WAVOUT_PATH/$f*);
printf "<tr>\n <td>%s</td>\n" $f >> $HTML
for h in $headings
do
# extract variance from stats file
if [ "$1" = "var" ]; then
s=$(cat $STATS | sed -n "s/RESULTS $h $f.*var: \(.*\) sd.*/\1/p")
fi
if [ "$1" = "sd" ]; then
s=$(cat $STATS | sed -n "s/RESULTS $h $f.*sd: \(.*\) n.*/\1/p")
fi
if [ "$s" = "" ]; then
s="-"
fi
if [ $1 = "outliers" ]; then
outliers=$(cat $STATS | sed -n "s/RESULTS $h $f.*dB = \(.*\)/\1/p")
if [ ! "$outliers" = "" ]; then
png=$PNG_PATH/$f'_'$h'.png'
cmd="graphics_toolkit ('gnuplot'); o=[$outliers]; bar([1-sum(o) o],'hist'); axis([0 4 0 1]); print(\"$png\",'-dpng','-S120,120')"
octave --no-gui -qf --eval "$cmd"
b=$(basename $png)
printf " <td align=center><img src=\"png/%s\" ></img></td>\n" $b >> $HTML
else
printf " <td></td>\n" >> $HTML
fi
elif [ $1 = "quant" ]; then
sf=$SV_PATH/$f'_?_'$h.txt
if [ -e $sf ]; then
png=$PNG_PATH/$f'_'$h'_quant.png'
t=$(echo $h | sed -n "s/.*_\(.*\)ms/\1/p")
octave --no-gui -p src -qf src/plot_speech_quant.m $F32_PATH/$f.raw $sf $png $t
b=$(basename $png)
printf " <td align=center><a href="png/%s"><img width=100 height=100 src=\"png/%s\" ></img></a></td>\n" $b $b >> $HTML
else
printf " <td></td>\n" >> $HTML
fi
else
printf " <td align="center">%s</td>\n" $s >> $HTML
fi
done
printf "</tr>\n" >> $HTML
done
printf "</table><p>\n" >> $HTML
}
if [ $lite -eq 0 ]; then
table_of_values "var" "Variance"
table_of_values "sd" "Standard Deviation"
table_of_values "outliers" "Outliers"
#
# Table of Speech/Pitch countours ----------------------------------------------
#
printf "<table>\n" >> $HTML
printf "<caption>Pitch Countours</caption>\n" >> $HTML
# heading row
printf "<tr>\n" >> $HTML
for f in $WAV_FILES
do
printf " <th>%s</th>\n" $f >> $HTML
done
printf "</tr>\n" >> $HTML
# manually set pitch axis to make plots easier to read. TODO this breaks when we add new samples, need an associative array
mx=(400 200 200 400 400 200 400 200)
count=0
# row of links to PNGs
printf "<tr>\n" >> $HTML
for f in $WAV_FILES
do
sox $WAVIN_PATH/$f.wav -t raw $F32_PATH/$f.raw
./dump_data --test --c2pitch $F32_PATH/$f.raw $F32_PATH/$f'_c2'.f32
octave --no-gui -p src -qf src/plot_speech_pitch.m $F32_PATH/$f.raw $F32_PATH/$f'_c2'.f32 - $PNG_PATH/$f'_pitch.png' ${mx[count]}
count=$(( $count + 1 ))
b=$f'_pitch.png'
printf " <td align="center"><a href=\"png/%s\"><img width=100 height=100 src=\"png/%s\" /></a></td>\n" $b $b >> $HTML
done
printf "</tr>\n" >> $HTML
printf "</table><p>\n" >> $HTML
table_of_values "quant" "Quantiser Error Countours"
fi
printf "</body>\n</html>\n" >> $HTML
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/zhoub86/LPCNet.git
git@gitee.com:zhoub86/LPCNet.git
zhoub86
LPCNet
LPCNet
master

搜索帮助

0d507c66 1850385 C8b1a773 1850385